Blame - TestON/tests/SCPF/SCPFmastershipFailoverLat/SCPFmastershipFailoverLat.py - OnosSystemTest

blob: ecbf5cda67ca158cbb91fa47238ab2199abba632 [file] [log] [blame]

Jeremy Ronquillo	818bc7c	2017-08-09 17:14:53 +0000	[diff] [blame]	1	"""
				2	Copyright 2017 Open Networking Foundation (ONF)
				3
				4	Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
				5	the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
				6	or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
				7
				8	TestON is free software: you can redistribute it and/or modify
				9	it under the terms of the GNU General Public License as published by
				10	the Free Software Foundation, either version 2 of the License, or
				11	(at your option) any later version.
				12
				13	TestON is distributed in the hope that it will be useful,
				14	but WITHOUT ANY WARRANTY; without even the implied warranty of
				15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	GNU General Public License for more details.
				17
				18	You should have received a copy of the GNU General Public License
				19	along with TestON. If not, see <http://www.gnu.org/licenses/>.
				20	"""
				21
				22	class SCPFmastershipFailoverLat:
				23	def __init__( self ):
				24	self.default = ''
				25
				26	def CASE0( self, main ):
				27	import os
				28	import imp
				29	'''
				30	- GIT
				31	- BUILDING ONOS
				32	Pull specific ONOS branch, then Build ONOS ono ONOS Bench.
				33	This step is usually skipped. Because in a Jenkins driven automated
				34	test env. We want Jenkins jobs to pull&build for flexibility to handle
				35	different versions of ONOS.
				36	- Construct tests variables
				37	'''
				38	try:
				39	from tests.dependencies.ONOSSetup import ONOSSetup
				40	main.testSetUp = ONOSSetup()
				41	except ImportError:
				42	main.log.error( "ONOSSetup not found. exiting the test" )
				43	main.exit()
				44	main.testSetUp.envSetupDescription()
				45	stepResult = main.FALSE
				46	try:
				47	main.MN1Ip = main.params[ 'MN' ][ 'ip1' ]
				48	main.cellName = main.params[ 'ENV' ][ 'cellName' ]
				49	main.apps = main.params[ 'ENV' ][ 'cellApps' ]
				50	main.scale = ( main.params[ 'SCALE' ] ).split( "," )
				51	main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ]
				52	main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ]
				53	main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] )
				54	main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] )
				55	main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ]
				56	main.maxScale = int( main.params[ 'max' ] )
				57	main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] )
				58	main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] )
				59	main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] )
				60	main.debug = main.params[ 'TEST' ][ 'debug' ]
				61	main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] )
				62	main.switchID = main.params[ 'SWITCH' ][ 'id' ]
				63	main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ]
				64	main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ]
				65	main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] )
				66	main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] )
				67
				68	if main.debug == "True":
				69	main.debug = True
				70	else:
				71	main.debug = False
				72
				73	stepResult = main.testSetUp.envSetup()
				74	main.log.info( "Create Database file " + main.dbFileName )
				75	resultsDB = open( main.dbFileName, "w+" )
				76	resultsDB.close()
				77
				78	except Exception as e:
				79	main.testSetUp.envSetupException( e )
				80	main.testSetUp.evnSetupConclusion( stepResult )
				81
				82
				83
				84	def CASE1( self, main ):
				85	# Clean up test environment and set up
				86	import time
				87	main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, True,
				88	cellName=main.cellName, killRemoveMax=False )
				89	try:
				90	from tests.dependencies.utils import Utils
				91	except ImportError:
				92	main.log.error( "Utils not found exiting the test" )
				93	main.exit()
				94	try:
				95	main.Utils
				96	except ( NameError, AttributeError ):
				97	main.Utils = Utils()
				98	main.Utils.mininetCleanup( main.Mininet1 )
				99
				100	main.step( "Starting up Mininet from command." )
				101
				102	mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount
				103	for ctrl in main.Cluster.active():
				104	mnCmd += " --controller remote,ip=" + ctrl.ipAddress
				105
				106	stepResult = main.Mininet1.startNet( mnCmd=mnCmd )
				107
				108	utilities.assert_equals( expect=main.TRUE,
				109	actual=stepResult,
				110	onpass="Mininet was set up correctly.",
				111	onfail="Mininet was NOT set up correctly." )
				112
				113
				114	def CASE2( self, main ):
				115	"""
				116	Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request
				117	(tshark time), then bring the node back up.
				118	"""
				119	import time
				120	import datetime
				121	import numpy
				122	from tests.HA.dependencies.HA import HA
				123
				124	main.HA = HA()
				125
				126	main.latencyData = { 'kill_to_deactivation' : [],
				127	'deactivation_to_role_request' : [] }
				128
				129	main.failCounter = 0
				130	passingResult = True
				131	criticalError = False
				132
				133	main.step( "Gathering data starting with " + str( main.warmUp ) + " warm ups and a sample size of " + str( main.sampleSize ) )
				134
				135	for iteration in range( 0, main.sampleSize + main.warmUp ):
				136
				137	main.log.info( "==========================================" )
				138	main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) )
				139
				140	ip_address = main.Cluster.active( 0 ).ipAddress
				141	strNodeNumToKill = str( main.nodeNumToKill )
				142
				143	main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill )
				144	main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address )
				145
				146	main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
				147	time.sleep( main.recoverySleep )
				148	mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address
				149
				150	if not mastershipCheck:
				151	main.log.warn( "Mastership is NOT as expected." )
				152
				153	with open( main.tsharkResultPath, "w" ) as tshark:
				154	tshark.write( "" )
				155	main.log.info( "Starting tshark capture." )
				156	main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath )
				157	time1 = time.time() * 1000.0
				158
				159	# Kill an ONOS node
				160	main.log.info( "Killing ONOS node " + strNodeNumToKill + "." )
				161	killresult = main.ONOSbench.onosKill( ip_address )
				162	main.Cluster.runningNodes[ main.nodeNumToKill ].active = False
				163
				164	# Stop an ONOS node
				165	main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." )
				166	stopresult = main.ONOSbench.onosStop( ip_address )
				167
				168	killStopResult = stopresult == killresult and True
				169
				170	if not killStopResult:
				171	main.log.error( "ONOS node was NOT successfully stopped and killed." )
				172	criticalError = True
				173
				174	time.sleep( main.failoverSleep )
				175
				176	# Stop tshark and get times
				177	main.log.info( "Stopping tshark." )
				178	main.ONOSbench.tsharkStop()
				179
				180	masterChangedLats = []
				181	instanceDeactivatedLats = []
				182
				183	main.log.info( "Obtaining latencies from 'events' output." )
				184	for CLInum in range( 0, main.Cluster.numCtrls - 1 ):
				185	eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" )
				186	for line in reversed( eventOutput ):
				187	if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum:
				188	deactivateTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
				189	instanceDeactivatedLats.append( deactivateTime - time1 )
				190	elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum:
				191	changedTime = float( datetime.datetime.strptime( line.split()[ 0 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
				192	masterChangedLats.append( changedTime - time1 )
				193	if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum:
				194	break
				195
				196	instanceDeactivatedLats.sort()
				197	instanceDeactivated = instanceDeactivatedLats[ 0 ]
				198
				199	eventLatCheck = True if masterChangedLats and instanceDeactivated else False
				200	if not eventLatCheck:
				201	main.log.warn( "Latencies were NOT obtained from 'events' successfully." )
				202
				203	main.log.info( "Obtain latency from tshark output." )
				204	tsharkLatCheck = True
				205	with open( main.tsharkResultPath, "r" ) as resultFile:
				206	resultText = resultFile.readline()
				207	main.log.info( "Capture result: " + resultText )
				208	resultText = resultText.split()
				209	if len( resultText ) > 1:
				210	roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1
				211	resultFile.close()
				212	else:
				213	main.log.error( "Tshark output file is NOT as expected." )
				214	tsharkLatCheck = False
				215	if not tsharkLatCheck:
				216	main.log.warn( "Latency was NOT obtained from tshark successfully." )
				217
				218	validDataCheck = False
				219	if tsharkLatCheck:
				220	main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) )
				221	main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) )
				222	if iteration >= main.warmUp:
				223	main.log.info( "Verifying that the data are valid." ) # Don't record data during a warm-up
				224	validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \
				225	instanceDeactivated >= 0
				226	if not validDataCheck:
				227	main.log.warn( "Data are NOT valid." )
				228
				229	if eventLatCheck and tsharkLatCheck and validDataCheck:
				230	main.log.info( "Saving data..." )
				231	main.latencyData[ 'kill_to_deactivation' ].append( instanceDeactivated )
				232	main.latencyData[ 'deactivation_to_role_request' ].append( roleRequestLat - instanceDeactivated )
				233
				234
				235	# Restart ONOS node
				236	main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." )
				237	startResult = main.ONOSbench.onosStart( ip_address )
				238
				239	if not startResult:
				240	main.log.error( "ONOS nodes NOT successfully started." )
				241	criticalError = True
				242
				243	# Check if ONOS is up yet
				244	main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." )
				245	upResult = main.ONOSbench.isup( ip_address )
				246
				247	if not upResult:
				248	main.log.error( "ONOS did NOT successfully restart." )
				249	criticalError = True
				250
				251	# Restart CLI
				252	main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." )
				253	cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address )
				254	main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True
				255
				256	if not cliResult:
				257	main.log.error( "ONOS CLI did NOT successfully restart." )
				258	criticalError = True
				259
				260	main.log.info( "Checking ONOS nodes." )
				261	nodeResults = utilities.retry( main.HA.nodesCheck,
				262	False,
				263	args=[ main.Cluster.active() ],
				264	sleep=1,
				265	attempts=3 )
				266
				267	if not nodeResults:
				268	main.log.error( "Nodes check NOT successful." )
				269	criticalError = True
				270
				271	main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
				272	time.sleep( main.recoverySleep )
				273
				274	if not ( mastershipCheck and
				275	eventLatCheck and
				276	tsharkLatCheck and
				277	validDataCheck ) and \
				278	iteration >= main.warmUp:
				279	main.failCounter += 1
				280	main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) )
				281	if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError:
				282	main.log.error( str( main.failPercent * 100 ) + "% or more of data is invalid, or a critical error has occurred." )
				283	passingResult = False
				284	break
				285
				286	utilities.assert_equals( expect=True, actual=passingResult,
				287	onpass="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering was successful.",
				288	onfail="Node scaling " + str( main.Cluster.numCtrls ) + " data gathering FAILED. Stopping test.")
				289	if not passingResult:
				290	main.cleanAndExit()
				291
				292
				293	def CASE3( self, main ):
				294	"""
				295	Write results to database file.
				296	Omit this case if you don't want to write to database.
				297	"""
				298	import numpy
				299	result = { 'avg' : {}, 'stddev' : {} }
				300
				301	for i in main.latencyData:
				302	result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] )
				303	result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] )
				304
				305	main.log.info( "result: " + str( result ) )
				306	with open( main.dbFileName, "a" ) as dbFile:
				307	strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'"
				308	strToWrite += ",'" + main.commit.split()[ 1 ] + "'"
				309	for i in result:
				310	for j in result[ i ]:
				311	strToWrite += "," + str( result[ i ][ j ] )
				312	strToWrite += "\n"
				313	dbFile.write( strToWrite )
				314	dbFile.close()