Created SCPFmastershipFailoverLat test
Change-Id: I9610a17a8d4f0ccb6a83b27679a9d20efed93f25
diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/README b/TestON/tests/SCPF/SCPFmastershipFailoverLat/README
new file mode 100644
index 0000000..f992efd
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/README
@@ -0,0 +1,2 @@
+The objective of SCPFmastershipFailoverLat is to test the latency for an ONOS node being killed, change of mastership, and role request as ONOS scales.
+For more details, check out the wiki documentation: https://wiki.onosproject.org/x/O4De
\ No newline at end of file
diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.params b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.params
new file mode 100644
index 0000000..1b4fc02
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.params
@@ -0,0 +1,71 @@
+<PARAMS>
+ <testcases>0,[1,2,3]*3</testcases>
+
+ <SCALE>3,5,7</SCALE>
+ <max>7</max>
+
+ <ENV>
+ <cellName>topo_perf_test</cellName>
+ <cellApps>drivers,events,openflow</cellApps>
+ </ENV>
+
+ <MN>
+ <ip1>OCN</ip1>
+ </MN>
+
+ <GIT>
+ <pull>False</pull>
+ <branch>master</branch>
+ </GIT>
+
+ <TSHARK>
+ <tsharkResultPath>/tmp/tshark_mastership_failover</tsharkResultPath>
+ <ofpRoleRequest>OF 1.3 90 of_role_request</ofpRoleRequest>
+ </TSHARK>
+
+ <SLEEP>
+ <recovery>5</recovery>
+ <failover>10</failover>
+ <mininet>5</mininet>
+ </SLEEP>
+
+ <TIMEOUT>
+ <timeout>60</timeout>
+ </TIMEOUT>
+
+ <TEST>
+ #'on' or 'off' debug mode.
+ #If on, logging will be more verbose and
+ #tshark pcap will be enabled
+ #pcap file located at /tmp/'capture_name'
+ <debug>True</debug>
+
+ #Number of iterations to ignore initially
+ <warmUp>5</warmUp>
+
+ #Number of times to iterate each case
+ <sampleSize>20</sampleSize>
+
+ # Percent of data that can be invalid per sample size iteration.
+ # Any more than this will cause the test to fail and stop. Must be a float between 0 and 1.
+ <failPercent>0.25</failPercent>
+
+ </TEST>
+
+ <DATABASE>
+ <dbName>/tmp/mastershipFailoverLatDB</dbName>
+ </DATABASE>
+
+ <SWITCH>
+ <id>of:0000000000000001</id>
+ </SWITCH>
+
+ <TOPOLOGY>
+ <switchCount>5</switchCount>
+ <type>linear</type>
+ </TOPOLOGY>
+
+ <KILL>
+ <nodeNum>0</nodeNum>
+ </KILL>
+</PARAMS>
diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py
new file mode 100644
index 0000000..ecbf5cd
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py
@@ -0,0 +1,314 @@
+"""
+Copyright 2017 Open Networking Foundation (ONF)
+
+Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
+the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
+or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
+
+ TestON is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ TestON is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with TestON. If not, see <http://www.gnu.org/licenses/>.
+"""
+
+class SCPFmastershipFailoverLat:
+ def __init__( self ):
+ self.default = ''
+
+ def CASE0( self, main ):
+ import os
+ import imp
+ '''
+ - GIT
+ - BUILDING ONOS
+ Pull specific ONOS branch, then Build ONOS ono ONOS Bench.
+ This step is usually skipped. Because in a Jenkins driven automated
+ test env. We want Jenkins jobs to pull&build for flexibility to handle
+ different versions of ONOS.
+ - Construct tests variables
+ '''
+ try:
+ from tests.dependencies.ONOSSetup import ONOSSetup
+ main.testSetUp = ONOSSetup()
+ except ImportError:
+ main.log.error( "ONOSSetup not found. exiting the test" )
+ main.exit()
+ main.testSetUp.envSetupDescription()
+ stepResult = main.FALSE
+ try:
+ main.MN1Ip = main.params[ 'MN' ][ 'ip1' ]
+ main.cellName = main.params[ 'ENV' ][ 'cellName' ]
+ main.apps = main.params[ 'ENV' ][ 'cellApps' ]
+ main.scale = ( main.params[ 'SCALE' ] ).split( "," )
+ main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ]
+ main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ]
+ main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] )
+ main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] )
+ main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ]
+ main.maxScale = int( main.params[ 'max' ] )
+ main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] )
+ main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] )
+ main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] )
+ main.debug = main.params[ 'TEST' ][ 'debug' ]
+ main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] )
+ main.switchID = main.params[ 'SWITCH' ][ 'id' ]
+ main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ]
+ main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ]
+ main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] )
+ main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] )
+
+ if main.debug == "True":
+ main.debug = True
+ else:
+ main.debug = False
+
+ stepResult = main.testSetUp.envSetup()
+ main.log.info( "Create Database file " + main.dbFileName )
+ resultsDB = open( main.dbFileName, "w+" )
+ resultsDB.close()
+
+ except Exception as e:
+ main.testSetUp.envSetupException( e )
+ main.testSetUp.evnSetupConclusion( stepResult )
+
+
+
+ def CASE1( self, main ):
+ # Clean up test environment and set up
+ import time
+ main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, True,
+ cellName=main.cellName, killRemoveMax=False )
+ try:
+ from tests.dependencies.utils import Utils
+ except ImportError:
+ main.log.error( "Utils not found exiting the test" )
+ main.exit()
+ try:
+ main.Utils
+ except ( NameError, AttributeError ):
+ main.Utils = Utils()
+ main.Utils.mininetCleanup( main.Mininet1 )
+
+ main.step( "Starting up Mininet from command." )
+
+ mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount
+ for ctrl in main.Cluster.active():
+ mnCmd += " --controller remote,ip=" + ctrl.ipAddress
+
+ stepResult = main.Mininet1.startNet( mnCmd=mnCmd )
+
+ utilities.assert_equals( expect=main.TRUE,
+ actual=stepResult,
+ onpass="Mininet was set up correctly.",
+ onfail="Mininet was NOT set up correctly." )
+
+
+ def CASE2( self, main ):
+ """
+ Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request
+ (tshark time), then bring the node back up.
+ """
+ import time
+ import datetime
+ import numpy
+ from tests.HA.dependencies.HA import HA
+
+ main.HA = HA()
+
+ main.latencyData = { 'kill_to_deactivation' : [],
+ 'deactivation_to_role_request' : [] }
+
+ main.failCounter = 0
+ passingResult = True
+ criticalError = False
+
+ main.step( "Gathering data starting with " + str( main.warmUp ) + " warm ups and a sample size of " + str( main.sampleSize ) )
+
+ for iteration in range( 0, main.sampleSize + main.warmUp ):
+
+ main.log.info( "==========================================" )
+ main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) )
+
+ ip_address = main.Cluster.active( 0 ).ipAddress
+ strNodeNumToKill = str( main.nodeNumToKill )
+
+ main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill )
+ main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address )
+
+ main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
+ time.sleep( main.recoverySleep )
+ mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address
+
+ if not mastershipCheck:
+ main.log.warn( "Mastership is NOT as expected." )
+
+ with open( main.tsharkResultPath, "w" ) as tshark:
+ tshark.write( "" )
+ main.log.info( "Starting tshark capture." )
+ main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath )
+ time1 = time.time() * 1000.0
+
+ # Kill an ONOS node
+ main.log.info( "Killing ONOS node " + strNodeNumToKill + "." )
+ killresult = main.ONOSbench.onosKill( ip_address )
+ main.Cluster.runningNodes[ main.nodeNumToKill ].active = False
+
+ # Stop an ONOS node
+ main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." )
+ stopresult = main.ONOSbench.onosStop( ip_address )
+
+ killStopResult = stopresult == killresult and True
+
+ if not killStopResult:
+ main.log.error( "ONOS node was NOT successfully stopped and killed." )
+ criticalError = True
+
+ time.sleep( main.failoverSleep )
+
+ # Stop tshark and get times
+ main.log.info( "Stopping tshark." )
+ main.ONOSbench.tsharkStop()
+
+ masterChangedLats = []
+ instanceDeactivatedLats = []
+
+ main.log.info( "Obtaining latencies from 'events' output." )
+ for CLInum in range( 0, main.Cluster.numCtrls - 1 ):
+ eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" )
+ for line in reversed( eventOutput ):
+ if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum:
+ deactivateTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
+ instanceDeactivatedLats.append( deactivateTime - time1 )
+ elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum:
+ changedTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
+ masterChangedLats.append( changedTime - time1 )
+ if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum:
+ break
+
+ instanceDeactivatedLats.sort()
+ instanceDeactivated = instanceDeactivatedLats[ 0 ]
+
+ eventLatCheck = True if masterChangedLats and instanceDeactivated else False
+ if not eventLatCheck:
+ main.log.warn( "Latencies were NOT obtained from 'events' successfully." )
+
+ main.log.info( "Obtain latency from tshark output." )
+ tsharkLatCheck = True
+ with open( main.tsharkResultPath, "r" ) as resultFile:
+ resultText = resultFile.readline()
+ main.log.info( "Capture result: " + resultText )
+ resultText = resultText.split()
+ if len( resultText ) > 1:
+ roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1
+ resultFile.close()
+ else:
+ main.log.error( "Tshark output file is NOT as expected." )
+ tsharkLatCheck = False
+ if not tsharkLatCheck:
+ main.log.warn( "Latency was NOT obtained from tshark successfully." )
+
+ validDataCheck = False
+ if tsharkLatCheck:
+ main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) )
+ main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) )
+ if iteration >= main.warmUp:
+ main.log.info( "Verifying that the data are valid." ) # Don't record data during a warm-up
+ validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \
+ instanceDeactivated >= 0
+ if not validDataCheck:
+ main.log.warn( "Data are NOT valid." )
+
+ if eventLatCheck and tsharkLatCheck and validDataCheck:
+ main.log.info( "Saving data..." )
+ main.latencyData[ 'kill_to_deactivation' ].append( instanceDeactivated )
+ main.latencyData[ 'deactivation_to_role_request' ].append( roleRequestLat - instanceDeactivated )
+
+
+ # Restart ONOS node
+ main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." )
+ startResult = main.ONOSbench.onosStart( ip_address )
+
+ if not startResult:
+ main.log.error( "ONOS nodes NOT successfully started." )
+ criticalError = True
+
+ # Check if ONOS is up yet
+ main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." )
+ upResult = main.ONOSbench.isup( ip_address )
+
+ if not upResult:
+ main.log.error( "ONOS did NOT successfully restart." )
+ criticalError = True
+
+ # Restart CLI
+ main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." )
+ cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address )
+ main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True
+
+ if not cliResult:
+ main.log.error( "ONOS CLI did NOT successfully restart." )
+ criticalError = True
+
+ main.log.info( "Checking ONOS nodes." )
+ nodeResults = utilities.retry( main.HA.nodesCheck,
+ False,
+ args=[ main.Cluster.active() ],
+ sleep=1,
+ attempts=3 )
+
+ if not nodeResults:
+ main.log.error( "Nodes check NOT successful." )
+ criticalError = True
+
+ main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
+ time.sleep( main.recoverySleep )
+
+ if not ( mastershipCheck and
+ eventLatCheck and
+ tsharkLatCheck and
+ validDataCheck ) and \
+ iteration >= main.warmUp:
+ main.failCounter += 1
+ main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) )
+ if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError:
+ main.log.error( str( main.failPercent * 100 ) + "% or more of data is invalid, or a critical error has occurred." )
+ passingResult = False
+ break
+
+ utilities.assert_equals( expect=True, actual=passingResult,
+ onpass="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering was successful.",
+ onfail="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering FAILED. Stopping test.")
+ if not passingResult:
+ main.cleanAndExit()
+
+
+ def CASE3( self, main ):
+ """
+ Write results to database file.
+ Omit this case if you don't want to write to database.
+ """
+ import numpy
+ result = { 'avg' : {}, 'stddev' : {} }
+
+ for i in main.latencyData:
+ result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] )
+ result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] )
+
+ main.log.info( "result: " + str( result ) )
+ with open( main.dbFileName, "a" ) as dbFile:
+ strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'"
+ strToWrite += ",'" + main.commit.split()[ 1 ] + "'"
+ for i in result:
+ for j in result[ i ]:
+ strToWrite += "," + str( result[ i ][ j ] )
+ strToWrite += "\n"
+ dbFile.write( strToWrite )
+ dbFile.close()
diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.topo b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.topo
new file mode 100644
index 0000000..72c1715
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.topo
@@ -0,0 +1,42 @@
+<TOPOLOGY>
+
+ <COMPONENT>
+
+ <ONOScell>
+ <host>localhost</host> # ONOS "bench" machine
+ <user>sdn</user>
+ <password>rocks</password>
+ <type>OnosClusterDriver</type>
+ <connect_order>1</connect_order>
+ <COMPONENTS>
+ <cluster_name></cluster_name> # Used as a prefix for cluster components. Defaults to 'ONOS'
+ <diff_clihost></diff_clihost> # if it has different host other than localhost for CLI. True or empty. OC# will be used if True.
+ <karaf_username></karaf_username>
+ <karaf_password></karaf_password>
+ <web_user></web_user>
+ <web_pass></web_pass>
+ <rest_port></rest_port>
+ <prompt></prompt>
+ <onos_home></onos_home> # defines where onos home is
+ <nodes> 7 </nodes> # number of nodes in the cluster
+ </COMPONENTS>
+ </ONOScell>
+
+ <Mininet1>
+ <host>OCN</host>
+ <user>sdn</user>
+ <password>rocks</password>
+ <type>MininetCliDriver</type>
+ <connect_order>2</connect_order>
+ <COMPONENTS>
+ <arg1> --custom ~/mininet/custom/topo-perf-2sw.py </arg1>
+ <arg2> --topo mytopo</arg2>
+ <arg3> --switch ovsk,protocols=OpenFlow13</arg3>
+ <controller> remote </controller>
+ <prompt></prompt>
+ </COMPONENTS>
+ </Mininet1>
+
+ </COMPONENT>
+
+</TOPOLOGY>
diff --git a/TestON/tests/SCPF/SCPFmastershipFailoverLat/__init__.py b/TestON/tests/SCPF/SCPFmastershipFailoverLat/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/TestON/tests/SCPF/SCPFmastershipFailoverLat/__init__.py