Merge "Created SCPFmastershipFailoverLat test"

commit: 63223c65cad6ebf8b1721c985f7a35b35fc9de7c [log] [tgz]
author: You Wang <you@opennetworking.org> Fri Aug 18 19:21:24 2017 +0000
committer: Gerrit Code Review <gerrit@onosproject.org> Fri Aug 18 19:21:24 2017 +0000
tree: f0234bed30226ec69015d5502769513ccd905f88
parent: 40a1909abbc0f8318cfd2518e0b4b1d5cd246a32 [diff]
parent: 818bc7c1ce537dd9230859a0eb39724c1e93050b [diff]
diff --git a/TestON/drivers/common/cli/onosclidriver.py b/TestON/drivers/common/cli/onosclidriver.py
index 9636aa4..0190154 100755
--- a/TestON/drivers/common/cli/onosclidriver.py
+++ b/TestON/drivers/common/cli/onosclidriver.py

@@ -34,6 +34,7 @@
 jhall@onlab.us
 andrew@onlab.us
 shreya@onlab.us
+jeremyr@opennetworking.org
 """
 import pexpect
 import re
@@ -5659,3 +5660,51 @@
         except Exception:
             main.log.exception( self.name + ": Uncaught exception!" )
             main.cleanAndExit()
+
+    def events( self, args='-a' ):
+        """
+        Description: Returns events -a command output
+        Optional:
+            add other arguments
+        """
+        try:
+            cmdStr = "events"
+            if args:
+                cmdStr += " " + args
+            handle = self.sendline( cmdStr )
+            assert handle is not None, "Error in sendline"
+            assert "Command not found:" not in handle, handle
+            return handle
+        except AssertionError:
+            main.log.exception( "" )
+            return None
+        except TypeError:
+            main.log.exception( self.name + ": Object not as expected" )
+            return None
+        except pexpect.EOF:
+            main.log.error( self.name + ": EOF exception found" )
+            main.log.error( self.name + ":    " + self.handle.before )
+            main.cleanAndExit()
+        except Exception:
+            main.log.exception( self.name + ": Uncaught exception!" )
+            main.cleanAndExit()
+
+    def getMaster( self, deviceID ):
+        """
+            Description: Obtains current master using "roles" command for a specific deviceID
+        """
+        try:
+            return str( self.getRole( deviceID )[ 'master' ] )
+        except AssertionError:
+            main.log.exception( "" )
+            return None
+        except TypeError:
+            main.log.exception( self.name + ": Object not as expected" )
+            return None
+        except pexpect.EOF:
+            main.log.error( self.name + ": EOF exception found" )
+            main.log.error( self.name + ":    " + self.handle.before )
+            main.cleanAndExit()
+        except Exception:
+            main.log.exception( self.name + ": Uncaught exception!" )
+            main.cleanAndExit()
\ No newline at end of file

diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/README b/TestON/tests/SCPF/SCPFmastershipFailoverLat/README
new file mode 100644
index 0000000..f992efd
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/README

@@ -0,0 +1,2 @@
+The objective of SCPFmastershipFailoverLat is to test the latency for an ONOS node being killed, change of mastership, and role request as ONOS scales.
+For more details, check out the wiki documentation: https://wiki.onosproject.org/x/O4De
\ No newline at end of file

diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.params b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.params
new file mode 100644
index 0000000..1b4fc02
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.params

@@ -0,0 +1,71 @@
+<PARAMS>
+    <testcases>0,[1,2,3]*3</testcases>
+
+    <SCALE>3,5,7</SCALE>
+    <max>7</max>
+
+    <ENV>
+        <cellName>topo_perf_test</cellName>
+        <cellApps>drivers,events,openflow</cellApps>
+    </ENV>
+
+    <MN>
+        <ip1>OCN</ip1>
+    </MN>
+
+    <GIT>
+        <pull>False</pull>
+        <branch>master</branch>
+    </GIT>
+
+    <TSHARK>
+        <tsharkResultPath>/tmp/tshark_mastership_failover</tsharkResultPath>
+        <ofpRoleRequest>OF 1.3 90 of_role_request</ofpRoleRequest>
+    </TSHARK>
+
+    <SLEEP>
+        <recovery>5</recovery>
+        <failover>10</failover>
+        <mininet>5</mininet>
+    </SLEEP>
+
+    <TIMEOUT>
+        <timeout>60</timeout>
+    </TIMEOUT>
+
+    <TEST>
+        #'on' or 'off' debug mode.
+        #If on, logging will be more verbose and
+        #tshark pcap will be enabled
+        #pcap file located at /tmp/'capture_name'
+        <debug>True</debug>
+
+        #Number of iterations to ignore initially
+        <warmUp>5</warmUp>
+
+        #Number of times to iterate each case
+        <sampleSize>20</sampleSize>
+
+        # Percent of data that can be invalid per sample size iteration.
+        # Any more than this will cause the test to fail and stop. Must be a float between 0 and 1.
+        <failPercent>0.25</failPercent>
+
+   </TEST>
+
+    <DATABASE>
+        <dbName>/tmp/mastershipFailoverLatDB</dbName>
+    </DATABASE>
+
+    <SWITCH>
+        <id>of:0000000000000001</id>
+    </SWITCH>
+
+    <TOPOLOGY>
+        <switchCount>5</switchCount>
+        <type>linear</type>
+    </TOPOLOGY>
+
+    <KILL>
+        <nodeNum>0</nodeNum>
+    </KILL>
+</PARAMS>

diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py
new file mode 100644
index 0000000..ecbf5cd
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py

@@ -0,0 +1,314 @@
+"""
+Copyright 2017 Open Networking Foundation (ONF)
+
+Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
+the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
+or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
+
+    TestON is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    TestON is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with TestON.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+class SCPFmastershipFailoverLat:
+    def __init__( self ):
+        self.default = ''
+
+    def CASE0( self, main ):
+        import os
+        import imp
+        '''
+        - GIT
+        - BUILDING ONOS
+            Pull specific ONOS branch, then Build ONOS ono ONOS Bench.
+            This step is usually skipped. Because in a Jenkins driven automated
+            test env. We want Jenkins jobs to pull&build for flexibility to handle
+            different versions of ONOS.
+        - Construct tests variables
+        '''
+        try:
+            from tests.dependencies.ONOSSetup import ONOSSetup
+            main.testSetUp = ONOSSetup()
+        except ImportError:
+            main.log.error( "ONOSSetup not found. exiting the test" )
+            main.exit()
+        main.testSetUp.envSetupDescription()
+        stepResult = main.FALSE
+        try:
+            main.MN1Ip = main.params[ 'MN' ][ 'ip1' ]
+            main.cellName = main.params[ 'ENV' ][ 'cellName' ]
+            main.apps = main.params[ 'ENV' ][ 'cellApps' ]
+            main.scale = ( main.params[ 'SCALE' ] ).split( "," )
+            main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ]
+            main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ]
+            main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] )
+            main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] )
+            main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ]
+            main.maxScale = int( main.params[ 'max' ] )
+            main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] )
+            main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] )
+            main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] )
+            main.debug = main.params[ 'TEST' ][ 'debug' ]
+            main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] )
+            main.switchID = main.params[ 'SWITCH' ][ 'id' ]
+            main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ]
+            main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ]
+            main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] )
+            main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] )
+
+            if main.debug == "True":
+                main.debug = True
+            else:
+                main.debug = False
+
+            stepResult = main.testSetUp.envSetup()
+            main.log.info( "Create Database file " + main.dbFileName )
+            resultsDB = open( main.dbFileName, "w+" )
+            resultsDB.close()
+
+        except Exception as e:
+            main.testSetUp.envSetupException( e )
+        main.testSetUp.evnSetupConclusion( stepResult )
+
+
+
+    def CASE1( self, main ):
+        # Clean up test environment and set up
+        import time
+        main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, True,
+                                  cellName=main.cellName, killRemoveMax=False )
+        try:
+            from tests.dependencies.utils import Utils
+        except ImportError:
+            main.log.error( "Utils not found exiting the test" )
+            main.exit()
+        try:
+            main.Utils
+        except ( NameError, AttributeError ):
+            main.Utils = Utils()
+        main.Utils.mininetCleanup( main.Mininet1 )
+
+        main.step( "Starting up Mininet from command." )
+
+        mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount
+        for ctrl in main.Cluster.active():
+            mnCmd += " --controller remote,ip=" + ctrl.ipAddress
+
+        stepResult = main.Mininet1.startNet( mnCmd=mnCmd )
+
+        utilities.assert_equals( expect=main.TRUE,
+                                 actual=stepResult,
+                                 onpass="Mininet was set up correctly.",
+                                 onfail="Mininet was NOT set up correctly." )
+
+
+    def CASE2( self, main ):
+        """
+        Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request
+        (tshark time), then bring the node back up.
+        """
+        import time
+        import datetime
+        import numpy
+        from tests.HA.dependencies.HA import HA
+
+        main.HA = HA()
+
+        main.latencyData = { 'kill_to_deactivation' : [],
+                             'deactivation_to_role_request' : [] }
+
+        main.failCounter = 0
+        passingResult = True
+        criticalError = False
+
+        main.step( "Gathering data starting with " + str( main.warmUp ) + " warm ups and a sample size of " + str( main.sampleSize ) )
+
+        for iteration in range( 0, main.sampleSize + main.warmUp ):
+
+            main.log.info( "==========================================" )
+            main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) )
+
+            ip_address = main.Cluster.active( 0 ).ipAddress
+            strNodeNumToKill = str( main.nodeNumToKill )
+
+            main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill )
+            main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address )
+
+            main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
+            time.sleep( main.recoverySleep )
+            mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address
+
+            if not mastershipCheck:
+                main.log.warn( "Mastership is NOT as expected." )
+
+            with open( main.tsharkResultPath, "w" ) as tshark:
+                tshark.write( "" )
+            main.log.info( "Starting tshark capture." )
+            main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath )
+            time1 = time.time() * 1000.0
+
+            # Kill an ONOS node
+            main.log.info( "Killing ONOS node " + strNodeNumToKill + "." )
+            killresult = main.ONOSbench.onosKill( ip_address )
+            main.Cluster.runningNodes[ main.nodeNumToKill ].active = False
+
+            # Stop an ONOS node
+            main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." )
+            stopresult = main.ONOSbench.onosStop( ip_address )
+
+            killStopResult = stopresult == killresult and True
+
+            if not killStopResult:
+                main.log.error( "ONOS node was NOT successfully stopped and killed." )
+                criticalError = True
+
+            time.sleep( main.failoverSleep )
+
+            # Stop tshark and get times
+            main.log.info( "Stopping tshark." )
+            main.ONOSbench.tsharkStop()
+
+            masterChangedLats = []
+            instanceDeactivatedLats = []
+
+            main.log.info( "Obtaining latencies from 'events' output." )
+            for CLInum in range( 0, main.Cluster.numCtrls - 1 ):
+                eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" )
+                for line in reversed( eventOutput ):
+                    if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum:
+                        deactivateTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
+                        instanceDeactivatedLats.append( deactivateTime - time1 )
+                    elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum:
+                        changedTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
+                        masterChangedLats.append( changedTime - time1 )
+                    if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum:
+                        break
+
+            instanceDeactivatedLats.sort()
+            instanceDeactivated = instanceDeactivatedLats[ 0 ]
+
+            eventLatCheck = True if masterChangedLats and instanceDeactivated else False
+            if not eventLatCheck:
+                main.log.warn( "Latencies were NOT obtained from 'events' successfully." )
+
+            main.log.info( "Obtain latency from tshark output." )
+            tsharkLatCheck = True
+            with open( main.tsharkResultPath, "r" ) as resultFile:
+                resultText = resultFile.readline()
+                main.log.info( "Capture result: " + resultText )
+                resultText = resultText.split()
+                if len( resultText ) > 1:
+                    roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1
+                    resultFile.close()
+                else:
+                    main.log.error( "Tshark output file is NOT as expected." )
+                    tsharkLatCheck = False
+            if not tsharkLatCheck:
+                main.log.warn( "Latency was NOT obtained from tshark successfully." )
+
+            validDataCheck = False
+            if tsharkLatCheck:
+                main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) )
+                main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) )
+                if iteration >= main.warmUp:
+                    main.log.info( "Verifying that the data are valid." )  # Don't record data during a warm-up
+                    validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \
+                                     instanceDeactivated >= 0
+                    if not validDataCheck:
+                        main.log.warn( "Data are NOT valid." )
+
+                    if eventLatCheck and tsharkLatCheck and validDataCheck:
+                        main.log.info( "Saving data..." )
+                        main.latencyData[ 'kill_to_deactivation' ].append( instanceDeactivated )
+                        main.latencyData[ 'deactivation_to_role_request' ].append( roleRequestLat - instanceDeactivated )
+
+
+            # Restart ONOS node
+            main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." )
+            startResult = main.ONOSbench.onosStart( ip_address )
+
+            if not startResult:
+                main.log.error( "ONOS nodes NOT successfully started." )
+                criticalError = True
+
+            # Check if ONOS is up yet
+            main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." )
+            upResult = main.ONOSbench.isup( ip_address )
+
+            if not upResult:
+                main.log.error( "ONOS did NOT successfully restart." )
+                criticalError = True
+
+            # Restart CLI
+            main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." )
+            cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address )
+            main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True
+
+            if not cliResult:
+                main.log.error( "ONOS CLI did NOT successfully restart." )
+                criticalError = True
+
+            main.log.info( "Checking ONOS nodes." )
+            nodeResults = utilities.retry( main.HA.nodesCheck,
+                                           False,
+                                           args=[ main.Cluster.active() ],
+                                           sleep=1,
+                                           attempts=3 )
+
+            if not nodeResults:
+                main.log.error( "Nodes check NOT successful." )
+                criticalError = True
+
+            main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
+            time.sleep( main.recoverySleep )
+
+            if not ( mastershipCheck and
+                     eventLatCheck and
+                     tsharkLatCheck and
+                     validDataCheck ) and \
+                     iteration >= main.warmUp:
+                main.failCounter += 1
+                main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) )
+            if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError:
+                main.log.error( str( main.failPercent * 100 ) + "% or more of data is invalid, or a critical error has occurred." )
+                passingResult = False
+                break
+
+        utilities.assert_equals( expect=True, actual=passingResult,
+                                 onpass="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering was successful.",
+                                 onfail="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering FAILED. Stopping test.")
+        if not passingResult:
+            main.cleanAndExit()
+
+
+    def CASE3( self, main ):
+        """
+        Write results to database file.
+        Omit this case if you don't want to write to database.
+        """
+        import numpy
+        result = { 'avg' : {}, 'stddev' : {} }
+
+        for i in main.latencyData:
+            result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] )
+            result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] )
+
+        main.log.info( "result: " + str( result ) )
+        with open( main.dbFileName, "a" ) as dbFile:
+            strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'"
+            strToWrite += ",'" + main.commit.split()[ 1 ] + "'"
+            for i in result:
+                for j in result[ i ]:
+                    strToWrite += "," + str( result[ i ][ j ] )
+            strToWrite += "\n"
+            dbFile.write( strToWrite )
+            dbFile.close()

diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.topo b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.topo
new file mode 100644
index 0000000..72c1715
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.topo

@@ -0,0 +1,42 @@
+<TOPOLOGY>
+
+    <COMPONENT>
+
+        <ONOScell>
+            <host>localhost</host>  # ONOS "bench" machine
+            <user>sdn</user>
+            <password>rocks</password>
+            <type>OnosClusterDriver</type>
+            <connect_order>1</connect_order>
+            <COMPONENTS>
+                <cluster_name></cluster_name>  # Used as a prefix for cluster components. Defaults to 'ONOS'
+                <diff_clihost></diff_clihost>  # if it has different host other than localhost for CLI. True or empty. OC# will be used if True.
+                <karaf_username></karaf_username>
+                <karaf_password></karaf_password>
+                <web_user></web_user>
+                <web_pass></web_pass>
+                <rest_port></rest_port>
+                <prompt></prompt>
+                <onos_home></onos_home>  # defines where onos home is
+                <nodes> 7 </nodes>  # number of nodes in the cluster
+            </COMPONENTS>
+        </ONOScell>
+
+        <Mininet1>
+            <host>OCN</host>
+            <user>sdn</user>
+            <password>rocks</password>
+            <type>MininetCliDriver</type>
+            <connect_order>2</connect_order>
+            <COMPONENTS>
+                <arg1> --custom ~/mininet/custom/topo-perf-2sw.py </arg1>
+                <arg2> --topo mytopo</arg2>
+                <arg3> --switch ovsk,protocols=OpenFlow13</arg3>
+                <controller> remote </controller>
+                <prompt></prompt>
+            </COMPONENTS>
+        </Mininet1>
+
+    </COMPONENT>
+
+</TOPOLOGY>

diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/__init__.py b/TestON/tests/SCPF/SCPFmastershipFailoverLat/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/__init__.py
commit	63223c65cad6ebf8b1721c985f7a35b35fc9de7c	[log] [tgz]
author	You Wang <you@opennetworking.org>	Fri Aug 18 19:21:24 2017 +0000
committer	Gerrit Code Review <gerrit@onosproject.org>	Fri Aug 18 19:21:24 2017 +0000
tree	f0234bed30226ec69015d5502769513ccd905f88
parent	40a1909abbc0f8318cfd2518e0b4b1d5cd246a32 [diff]
parent	818bc7c1ce537dd9230859a0eb39724c1e93050b [diff]