Created SCPFmastershipFailoverLat test Change-Id: I9610a17a8d4f0ccb6a83b27679a9d20efed93f25

commit: 818bc7c1ce537dd9230859a0eb39724c1e93050b [log] [tgz]
author: Jeremy Ronquillo <jeremyr@opennetworking.org> Wed Aug 09 17:14:53 2017 +0000
committer: Jeremy Ronquillo <jeremyr@opennetworking.org> Thu Aug 17 15:37:19 2017 +0000
tree: 4577f30ab40bb4e554a6af0284b4782c552292cf
parent: 71c31ec30b1b65fa538d66aa1ac79cb068577e97 [diff] [blame]
diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py
new file mode 100644
index 0000000..ecbf5cd
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py

@@ -0,0 +1,314 @@
+"""
+Copyright 2017 Open Networking Foundation (ONF)
+
+Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
+the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
+or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
+
+    TestON is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    TestON is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with TestON.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+class SCPFmastershipFailoverLat:
+    def __init__( self ):
+        self.default = ''
+
+    def CASE0( self, main ):
+        import os
+        import imp
+        '''
+        - GIT
+        - BUILDING ONOS
+            Pull specific ONOS branch, then Build ONOS ono ONOS Bench.
+            This step is usually skipped. Because in a Jenkins driven automated
+            test env. We want Jenkins jobs to pull&build for flexibility to handle
+            different versions of ONOS.
+        - Construct tests variables
+        '''
+        try:
+            from tests.dependencies.ONOSSetup import ONOSSetup
+            main.testSetUp = ONOSSetup()
+        except ImportError:
+            main.log.error( "ONOSSetup not found. exiting the test" )
+            main.exit()
+        main.testSetUp.envSetupDescription()
+        stepResult = main.FALSE
+        try:
+            main.MN1Ip = main.params[ 'MN' ][ 'ip1' ]
+            main.cellName = main.params[ 'ENV' ][ 'cellName' ]
+            main.apps = main.params[ 'ENV' ][ 'cellApps' ]
+            main.scale = ( main.params[ 'SCALE' ] ).split( "," )
+            main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ]
+            main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ]
+            main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] )
+            main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] )
+            main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ]
+            main.maxScale = int( main.params[ 'max' ] )
+            main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] )
+            main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] )
+            main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] )
+            main.debug = main.params[ 'TEST' ][ 'debug' ]
+            main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] )
+            main.switchID = main.params[ 'SWITCH' ][ 'id' ]
+            main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ]
+            main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ]
+            main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] )
+            main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] )
+
+            if main.debug == "True":
+                main.debug = True
+            else:
+                main.debug = False
+
+            stepResult = main.testSetUp.envSetup()
+            main.log.info( "Create Database file " + main.dbFileName )
+            resultsDB = open( main.dbFileName, "w+" )
+            resultsDB.close()
+
+        except Exception as e:
+            main.testSetUp.envSetupException( e )
+        main.testSetUp.evnSetupConclusion( stepResult )
+
+
+
+    def CASE1( self, main ):
+        # Clean up test environment and set up
+        import time
+        main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, True,
+                                  cellName=main.cellName, killRemoveMax=False )
+        try:
+            from tests.dependencies.utils import Utils
+        except ImportError:
+            main.log.error( "Utils not found exiting the test" )
+            main.exit()
+        try:
+            main.Utils
+        except ( NameError, AttributeError ):
+            main.Utils = Utils()
+        main.Utils.mininetCleanup( main.Mininet1 )
+
+        main.step( "Starting up Mininet from command." )
+
+        mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount
+        for ctrl in main.Cluster.active():
+            mnCmd += " --controller remote,ip=" + ctrl.ipAddress
+
+        stepResult = main.Mininet1.startNet( mnCmd=mnCmd )
+
+        utilities.assert_equals( expect=main.TRUE,
+                                 actual=stepResult,
+                                 onpass="Mininet was set up correctly.",
+                                 onfail="Mininet was NOT set up correctly." )
+
+
+    def CASE2( self, main ):
+        """
+        Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request
+        (tshark time), then bring the node back up.
+        """
+        import time
+        import datetime
+        import numpy
+        from tests.HA.dependencies.HA import HA
+
+        main.HA = HA()
+
+        main.latencyData = { 'kill_to_deactivation' : [],
+                             'deactivation_to_role_request' : [] }
+
+        main.failCounter = 0
+        passingResult = True
+        criticalError = False
+
+        main.step( "Gathering data starting with " + str( main.warmUp ) + " warm ups and a sample size of " + str( main.sampleSize ) )
+
+        for iteration in range( 0, main.sampleSize + main.warmUp ):
+
+            main.log.info( "==========================================" )
+            main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) )
+
+            ip_address = main.Cluster.active( 0 ).ipAddress
+            strNodeNumToKill = str( main.nodeNumToKill )
+
+            main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill )
+            main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address )
+
+            main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
+            time.sleep( main.recoverySleep )
+            mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address
+
+            if not mastershipCheck:
+                main.log.warn( "Mastership is NOT as expected." )
+
+            with open( main.tsharkResultPath, "w" ) as tshark:
+                tshark.write( "" )
+            main.log.info( "Starting tshark capture." )
+            main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath )
+            time1 = time.time() * 1000.0
+
+            # Kill an ONOS node
+            main.log.info( "Killing ONOS node " + strNodeNumToKill + "." )
+            killresult = main.ONOSbench.onosKill( ip_address )
+            main.Cluster.runningNodes[ main.nodeNumToKill ].active = False
+
+            # Stop an ONOS node
+            main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." )
+            stopresult = main.ONOSbench.onosStop( ip_address )
+
+            killStopResult = stopresult == killresult and True
+
+            if not killStopResult:
+                main.log.error( "ONOS node was NOT successfully stopped and killed." )
+                criticalError = True
+
+            time.sleep( main.failoverSleep )
+
+            # Stop tshark and get times
+            main.log.info( "Stopping tshark." )
+            main.ONOSbench.tsharkStop()
+
+            masterChangedLats = []
+            instanceDeactivatedLats = []
+
+            main.log.info( "Obtaining latencies from 'events' output." )
+            for CLInum in range( 0, main.Cluster.numCtrls - 1 ):
+                eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" )
+                for line in reversed( eventOutput ):
+                    if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum:
+                        deactivateTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
+                        instanceDeactivatedLats.append( deactivateTime - time1 )
+                    elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum:
+                        changedTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
+                        masterChangedLats.append( changedTime - time1 )
+                    if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum:
+                        break
+
+            instanceDeactivatedLats.sort()
+            instanceDeactivated = instanceDeactivatedLats[ 0 ]
+
+            eventLatCheck = True if masterChangedLats and instanceDeactivated else False
+            if not eventLatCheck:
+                main.log.warn( "Latencies were NOT obtained from 'events' successfully." )
+
+            main.log.info( "Obtain latency from tshark output." )
+            tsharkLatCheck = True
+            with open( main.tsharkResultPath, "r" ) as resultFile:
+                resultText = resultFile.readline()
+                main.log.info( "Capture result: " + resultText )
+                resultText = resultText.split()
+                if len( resultText ) > 1:
+                    roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1
+                    resultFile.close()
+                else:
+                    main.log.error( "Tshark output file is NOT as expected." )
+                    tsharkLatCheck = False
+            if not tsharkLatCheck:
+                main.log.warn( "Latency was NOT obtained from tshark successfully." )
+
+            validDataCheck = False
+            if tsharkLatCheck:
+                main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) )
+                main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) )
+                if iteration >= main.warmUp:
+                    main.log.info( "Verifying that the data are valid." )  # Don't record data during a warm-up
+                    validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \
+                                     instanceDeactivated >= 0
+                    if not validDataCheck:
+                        main.log.warn( "Data are NOT valid." )
+
+                    if eventLatCheck and tsharkLatCheck and validDataCheck:
+                        main.log.info( "Saving data..." )
+                        main.latencyData[ 'kill_to_deactivation' ].append( instanceDeactivated )
+                        main.latencyData[ 'deactivation_to_role_request' ].append( roleRequestLat - instanceDeactivated )
+
+
+            # Restart ONOS node
+            main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." )
+            startResult = main.ONOSbench.onosStart( ip_address )
+
+            if not startResult:
+                main.log.error( "ONOS nodes NOT successfully started." )
+                criticalError = True
+
+            # Check if ONOS is up yet
+            main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." )
+            upResult = main.ONOSbench.isup( ip_address )
+
+            if not upResult:
+                main.log.error( "ONOS did NOT successfully restart." )
+                criticalError = True
+
+            # Restart CLI
+            main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." )
+            cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address )
+            main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True
+
+            if not cliResult:
+                main.log.error( "ONOS CLI did NOT successfully restart." )
+                criticalError = True
+
+            main.log.info( "Checking ONOS nodes." )
+            nodeResults = utilities.retry( main.HA.nodesCheck,
+                                           False,
+                                           args=[ main.Cluster.active() ],
+                                           sleep=1,
+                                           attempts=3 )
+
+            if not nodeResults:
+                main.log.error( "Nodes check NOT successful." )
+                criticalError = True
+
+            main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
+            time.sleep( main.recoverySleep )
+
+            if not ( mastershipCheck and
+                     eventLatCheck and
+                     tsharkLatCheck and
+                     validDataCheck ) and \
+                     iteration >= main.warmUp:
+                main.failCounter += 1
+                main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) )
+            if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError:
+                main.log.error( str( main.failPercent * 100 ) + "% or more of data is invalid, or a critical error has occurred." )
+                passingResult = False
+                break
+
+        utilities.assert_equals( expect=True, actual=passingResult,
+                                 onpass="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering was successful.",
+                                 onfail="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering FAILED. Stopping test.")
+        if not passingResult:
+            main.cleanAndExit()
+
+
+    def CASE3( self, main ):
+        """
+        Write results to database file.
+        Omit this case if you don't want to write to database.
+        """
+        import numpy
+        result = { 'avg' : {}, 'stddev' : {} }
+
+        for i in main.latencyData:
+            result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] )
+            result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] )
+
+        main.log.info( "result: " + str( result ) )
+        with open( main.dbFileName, "a" ) as dbFile:
+            strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'"
+            strToWrite += ",'" + main.commit.split()[ 1 ] + "'"
+            for i in result:
+                for j in result[ i ]:
+                    strToWrite += "," + str( result[ i ][ j ] )
+            strToWrite += "\n"
+            dbFile.write( strToWrite )
+            dbFile.close()
commit	818bc7c1ce537dd9230859a0eb39724c1e93050b	[log] [tgz]
author	Jeremy Ronquillo <jeremyr@opennetworking.org>	Wed Aug 09 17:14:53 2017 +0000
committer	Jeremy Ronquillo <jeremyr@opennetworking.org>	Thu Aug 17 15:37:19 2017 +0000
tree	4577f30ab40bb4e554a6af0284b4782c552292cf
parent	71c31ec30b1b65fa538d66aa1ac79cb068577e97 [diff] [blame]