[SDFAB-1024] Modify TestON to verify persistence mastership

Additionally, improve ONOS cli driver

Change-Id: I92a6908fc5e76cdc6538dccf5845ee5a6e99662a
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params
new file mode 100644
index 0000000..2bad253
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params
@@ -0,0 +1,65 @@
+<PARAMS>
+    <testcases>1</testcases>
+
+    <GRAPH>
+        <nodeCluster>pairedleaves</nodeCluster>
+        <builds>20</builds>
+        <jobName>PMastership</jobName>
+    </GRAPH>
+
+    <persistent_setup>True</persistent_setup>
+
+    <kubernetes>
+        <appName>onos-classic</appName>
+        <namespace>tost</namespace>
+    </kubernetes>
+    <use_stern>True</use_stern>
+
+    <PMastership>
+
+        <PMastership_dataplane_fail>
+            <switch_to_kill>Leaf2</switch_to_kill> <!-- Component name of the switch to kill in CASE 5 -->
+            <k8s_switch_node>leaf2</k8s_switch_node>
+            <k8s_label>node-role.aetherproject.org</k8s_label>
+            <k8s_label_value_test>switch-test</k8s_label_value_test>
+            <k8s_label_value_normal>switch</k8s_label_value_normal>
+        </PMastership_dataplane_fail>
+
+    </PMastership>
+
+    <TOPO>
+        <switchNum>2</switchNum>
+        <linkNum>2</linkNum>
+    </TOPO>
+
+    <ONOS_Logging>
+        <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+        <org.omecproject.up4>TRACE</org.omecproject.up4>
+    </ONOS_Logging>
+    <ONOS_Logging_Reset>
+        <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+        <org.omecproject.up4>INFO</org.omecproject.up4>
+    </ONOS_Logging_Reset>
+
+    <ENV>
+        <cellName>productionCell</cellName>
+        <cellApps>drivers,fpm,lldpprovider,hostprovider,netcfghostprovider,drivers.bmv2,org.opencord.fabric-tofino,pipelines.fabric,org.stratumproject.fabric-tna,drivers.barefoot,segmentrouting,up4</cellApps>
+    </ENV>
+
+    <DEPENDENCY>
+        <useCommonConf>False</useCommonConf>
+        <useCommonTopo>True</useCommonTopo>
+        <useBmv2>True</useBmv2>
+        <bmv2SwitchType>stratum</bmv2SwitchType>
+        <switchPrefix></switchPrefix>
+        <stratumRoot>~/stratum</stratumRoot>
+        <topology>trellis_fabric.py</topology>
+        <lib></lib>
+    </DEPENDENCY>
+
+    <SCALE>
+        <size>3</size>
+        <max>3</max>
+    </SCALE>
+
+</PARAMS>
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py
new file mode 100644
index 0000000..886806e
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py
@@ -0,0 +1,287 @@
+class PMastership:
+
+    def __init__(self):
+        self.default = ''
+
+    def CASE1(self, main):
+        main.case("PMastership Test")
+        """
+        Verify there are no pending flows and groups
+        Get flows and group counts
+        Verify that are not 0
+        Get the master of leaf2 (look at the params file for the config)
+        Verify that has the master
+        Kill switch leaf2
+        Set label on switch K8S node to prevent K8S to redeploy stratum
+        Verify there are no pending flows and groups related to segment routing
+        Verify that the master of leaf2 is still the same as before
+        Wait for the switch to be up again
+        Verify there are no pending flows and groups
+        """
+        try:
+            from tests.USECASE.SegmentRouting.dependencies.Testcaselib import \
+                Testcaselib as run
+            from tests.USECASE.SegmentRouting.SRStaging.dependencies.SRStagingTest import \
+                SRStagingTest
+            import time
+        except ImportError as e:
+            main.log.error("Import not found. Exiting the test")
+            main.log.error(e)
+            main.cleanAndExit()
+        # Retrieves the params of the test
+        n_switches = int(main.params["TOPO"]["switchNum"])
+        switch_to_kill = main.params["PMastership"]["PMastership_dataplane_fail"]["switch_to_kill"]
+        k8s_switch_node = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_switch_node"]
+        k8s_label = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label"]
+        k8s_label_value_test = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label_value_test"]
+        k8s_label_value_normal = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label_value_normal"]
+        # Init the main components and variables
+        run.initTest(main)
+        main.log.info(main.Cluster.numCtrls)
+        main.Cluster.setRunningNode(3)
+        run.installOnos(main, skipPackage=True, cliSleep=5)
+        onos_cli = main.Cluster.active(0).CLI
+        kubectl = main.Cluster.active(0).Bench
+        kubeconfig = main.Cluster.active(0).k8s.kubeConfig
+        namespace = main.params['kubernetes']['namespace']
+
+        main.step("Verify there are added flows")
+        initial_flows_count = onos_cli.checkFlowCount()
+        empty = main.TRUE if ( initial_flows_count == 0 ) else main.FALSE
+        utilities.assert_equal(
+            expect=main.FALSE,
+            actual=empty,
+            onpass="There are " + str(initial_flows_count) + " added flows",
+            onfail="There are no added flows",
+        )
+
+        main.step("Verify there are added groups")
+        initial_groups_count = onos_cli.checkGroupCount()
+        empty = main.TRUE if ( initial_groups_count == 0 ) else main.FALSE
+        utilities.assert_equal(
+            expect=main.FALSE,
+            actual=empty,
+            onpass="There are " + str(initial_groups_count) + " added groups",
+            onfail="There are no added groups",
+        )
+
+        no_pending_flows = utilities.retry(onos_cli.checkFlowsState,
+                                           [False, None],
+                                           kwargs={"isPENDING": False},
+                                           attempts=20,
+                                           getRetryingTime=True)
+
+        main.step("Verify there are no pending flows")
+        utilities.assert_equal(
+            expect=main.TRUE,
+            actual=no_pending_flows,
+            onpass="There are no pending flows",
+            onfail="There are pending flows",
+        )
+
+        no_pending_groups = utilities.retry(onos_cli.checkGroupsState,
+                                            [False, None],
+                                            kwargs={"isPENDING": False},
+                                            attempts=20,
+                                            getRetryingTime=True)
+
+        main.step("Verify there are no pending groups")
+        utilities.assert_equal(
+            expect=main.TRUE,
+            actual=no_pending_groups,
+            onpass="There are no pending groups",
+            onfail="There are pending groups",
+        )
+
+        main.step("Retrieving " + switch_to_kill + " master")
+        initial_master = onos_cli.getMaster("device:" + k8s_switch_node)
+        no_master = main.TRUE if ( initial_master is None ) else main.FALSE
+        utilities.assert_equal(
+            expect=main.FALSE,
+            actual=no_master,
+            onpass=initial_master + " is the master of " + switch_to_kill,
+            onfail="There is no master for " + switch_to_kill,
+        )
+
+        main.step("Set label to switch k8s node and kill Stratum")
+        # K8s node name correspond to the switch name in lowercase
+        utilities.assert_equal(
+            expect=main.TRUE,
+            actual=kubectl.kubectlSetLabel(
+                nodeName=k8s_switch_node,
+                label=k8s_label,
+                value=k8s_label_value_test,
+                kubeconfig=kubeconfig,
+                namespace=namespace,
+            ),
+            onpass="Label has been set correctly on node %s" % k8s_switch_node,
+            onfail="Label has not been set on node %s" % k8s_switch_node
+        )
+
+        try:
+            def checkNumberStratumPods(n_value):
+                pods = kubectl.kubectlGetPodNames(
+                    kubeconfig=kubeconfig,
+                    namespace=namespace,
+                    name="stratum"
+                )
+                main.log.info("PODS: " + str(pods))
+                return n_value == len(pods) if pods is not main.FALSE else False
+            # Execute the following in try/except/finally to be sure to restore the
+            # k8s label even in case of unhandled exception.
+
+            # Wait for stratum pod to be removed from the switch
+            removed = utilities.retry(checkNumberStratumPods,
+                                      False,
+                                      args=[n_switches - 1],
+                                      attempts=50)
+            main.log.info("Stratum has been removed from the switch? %s" % removed)
+
+            sleepTime = 20
+            switch_component = getattr(main, switch_to_kill)
+            main.log.info("Sleeping %s seconds for ONOS to react" % sleepTime)
+            time.sleep(sleepTime)
+
+            available = utilities.retry(SRStagingTest.switchIsConnected,
+                                        True,
+                                        args=[switch_component],
+                                        attempts=300,
+                                        getRetryingTime=True)
+            main.log.info("Switch %s is available in ONOS? %s" % (
+                switch_to_kill, available))
+            utilities.assert_equal(
+                expect=True,
+                actual=not available and removed,
+                onpass="Stratum was removed from switch k8s node",
+                onfail="Stratum was not removed from switch k8s node"
+            )
+
+            main.step("Verify there are no segmentrouting flows after the failure")
+            raw_flows = onos_cli.flows(device="device:" + k8s_switch_node)
+            sr_flows = main.TRUE if "segmentrouting" in raw_flows else main.FALSE
+            utilities.assert_equal(
+                expect=main.FALSE,
+                actual=sr_flows,
+                onpass="There are no segmentrouting flows",
+                onfail="There are segmentrouting flows",
+            )
+
+            main.step("Verify there are no segmentrouting groups after the failure")
+            raw_groups = onos_cli.groups(device="device:" + k8s_switch_node)
+            sr_groups = main.TRUE if "segmentrouting" in raw_groups else main.FALSE
+            utilities.assert_equal(
+                expect=main.FALSE,
+                actual=sr_groups,
+                onpass="There are no segmentrouting groups",
+                onfail="There are segmentrouting groups",
+            )
+
+            main.step("Verify " + initial_master + " is still the master of " + switch_to_kill)
+            after_master = onos_cli.getMaster("device:" + k8s_switch_node)
+            no_master = main.TRUE if ( initial_master is None ) else main.FALSE
+            utilities.assert_equal(
+                expect=main.FALSE,
+                actual=no_master,
+                onpass=initial_master + " is the master of " + switch_to_kill,
+                onfail="There is no master for " + switch_to_kill,
+            )
+
+            same_master = main.TRUE if ( initial_master == after_master ) else main.FALSE
+            utilities.assert_equal(
+                expect=main.TRUE,
+                actual=same_master,
+                onpass=initial_master + " is still the master of " + switch_to_kill,
+                onfail="Master for " + switch_to_kill + " is " + after_master,
+            )
+
+        except Exception as e:
+            main.log.error("Unhandled exception!")
+            main.log.error(e)
+        finally:
+            utilities.assert_equal(
+                expect=main.TRUE,
+                actual=kubectl.kubectlSetLabel(
+                    nodeName=k8s_switch_node,
+                    label=k8s_label,
+                    value=k8s_label_value_normal,
+                    kubeconfig=kubeconfig,
+                    namespace=namespace,
+                ),
+                onpass="Label has been set correctly on node %s" % k8s_switch_node,
+                onfail="Label has not been set on node %s" % k8s_switch_node
+            )
+
+            # Wait for stratum pod to be re-deployed on the switch
+            deployed = utilities.retry(checkNumberStratumPods,
+                                       False,
+                                       args=[n_switches],
+                                       attempts=50)
+            main.log.info("Stratum has been redeployed on the switch? %s" % deployed)
+
+            # Wait switch to be back in ONOS
+            available = utilities.retry(SRStagingTest.switchIsConnected,
+                                        False,
+                                        args=[switch_component],
+                                        sleep=2,
+                                        attempts=300,
+                                        getRetryingTime=True)
+            main.log.info("Switch %s is available in ONOS? %s" % (
+                switch_to_kill, available))
+            utilities.assert_equal(
+                expect=True,
+                actual=available and deployed,
+                onpass="Switch is back available in ONOS and stratum has been redeployed",
+                onfail="Switch is not available in ONOS, may influence subsequent tests!"
+            )
+
+        sleepTime = 10
+        main.log.info("Sleeping %s seconds for ONOS to react and assure flows/groups are ADDED" % sleepTime)
+        time.sleep(sleepTime)
+
+        main.step("Verify there are added flows after reboot")
+        after_flows_count = onos_cli.checkFlowCount()
+        empty = main.TRUE if ( after_flows_count == 0 ) else main.FALSE
+        utilities.assert_equal(
+            expect=main.FALSE,
+            actual=empty,
+            onpass="There are " + str(after_flows_count) + " added flows",
+            onfail="There are no added flows",
+        )
+
+        main.step("Verify there are added groups after reboot")
+        after_groups_count = onos_cli.checkGroupCount()
+        empty = main.TRUE if ( after_groups_count == 0 ) else main.FALSE
+        utilities.assert_equal(
+            expect=main.FALSE,
+            actual=empty,
+            onpass="There are " + str(after_groups_count) + " added groups",
+            onfail="There are no added groups",
+        )
+
+        no_pending_flows = utilities.retry(onos_cli.checkFlowsState,
+                                           [False, None],
+                                           kwargs={"isPENDING": False},
+                                           attempts=20,
+                                           getRetryingTime=True)
+
+        main.step("Verify there are no pending flows after reboot")
+        utilities.assert_equal(
+            expect=main.TRUE,
+            actual=no_pending_flows,
+            onpass="There are no pending flows",
+            onfail="There are pending flows",
+        )
+
+        no_pending_groups = utilities.retry(onos_cli.checkGroupsState,
+                                            [False, None],
+                                            kwargs={"isPENDING": False},
+                                            attempts=20,
+                                            getRetryingTime=True)
+
+        main.step("Verify there are no pending groups after reboot")
+        utilities.assert_equal(
+            expect=main.TRUE,
+            actual=no_pending_groups,
+            onpass="There are no pending groups",
+            onfail="There are pending groups",
+        )
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo
new file mode 100644
index 0000000..38b0af9
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo
@@ -0,0 +1,57 @@
+<TOPOLOGY>
+    <COMPONENT>
+        <ONOScell>
+            <host>localhost</host>  # ONOS "bench" machine
+            <user>jenkins</user>
+            <password></password>
+            <type>OnosClusterDriver</type>
+            <connect_order>50</connect_order>
+            <jump_host></jump_host>
+            <home>~/onos</home>   # defines where onos home is on the build machine. Defaults to "~/onos/" if empty.
+            <COMPONENTS>
+                <kubeConfig>~/.kube/dev-pairedleaves-tucson</kubeConfig>  # If set, will attempt to use this file for setting up port-forwarding
+                <useDocker>True</useDocker>  # Whether to use docker for ONOS nodes
+                <docker_prompt>\$</docker_prompt>
+                <cluster_name></cluster_name>  # Used as a prefix for cluster components. Defaults to 'ONOS'
+                <diff_clihost>True</diff_clihost> # if it has different host other than localhost for CLI. True or empty. OC# will be used if True.
+                <karaf_username>karaf</karaf_username>
+                <karaf_password>karaf</karaf_password>
+                <web_user>karaf</web_user>
+                <web_pass>karaf</web_pass>
+                <karafPrompt_username>karaf</karafPrompt_username>
+                <rest_port></rest_port>
+                <prompt></prompt>  # TODO: we technically need a few of these, one per component
+                <onos_home>~/onos/</onos_home>  # defines where onos home is on the target cell machine. Defaults to entry in "home" if empty.
+                <nodes> 1 </nodes>  # number of nodes in the cluster
+                <up4_port>51001</up4_port> # Port where the UP4 P4Runtime server is listening
+            </COMPONENTS>
+        </ONOScell>
+
+        <Leaf2>
+            <host>10.76.28.71</host>
+            <user>root</user>
+            <password>onl</password>
+            <type>StratumOSSwitchDriver</type>
+            <connect_order>10</connect_order>
+            <COMPONENTS>
+                <shortName>leaf2</shortName>
+                <port1>2</port1>
+                <link1>Host2</link1>
+                <onosConfigPath></onosConfigPath>
+                <onosConfigFile></onosConfigFile>
+            </COMPONENTS>
+        </Leaf2>
+
+        <!--  This component is not needed, but required to use the Testcaselib  -->
+        <NetworkBench>
+            <host>10.76.28.66</host>
+            <user>jenkins</user>
+            <password></password>
+            <type>NetworkDriver</type>
+            <connect_order>1</connect_order>
+            <COMPONENTS>
+            </COMPONENTS>
+        </NetworkBench>
+
+    </COMPONENT>
+</TOPOLOGY>
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/README.md b/TestON/tests/USECASE/SegmentRouting/PMastership/README.md
new file mode 100644
index 0000000..6919afc
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/README.md
@@ -0,0 +1,12 @@
+# PMastership System Tests
+
+Tests in this folder are meant to excercise the persistent Mastership
+by simulating dataplane failure and verify that flows and groups
+created by SegmentRouting are correctly purged and that when the
+device is no longer available it still has the same master as before.
+
+# Requirements to run PMastership tests
+
+There are no particular requirements as it mainly relies on the ONOS
+CLI driver and some utility functions to bring down a device and
+manipulate k8s resources
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py b/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py