blob: c53484195237f531cdee942aac8f76eef208720b [file] [log] [blame]
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +00001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2017 Open Networking Foundation ( ONF )
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +00003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +000012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +000021class SCPFmastershipFailoverLat:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070022
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +000023 def __init__( self ):
24 self.default = ''
25
26 def CASE0( self, main ):
27 import os
28 import imp
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070029 """
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +000030 - GIT
31 - BUILDING ONOS
32 Pull specific ONOS branch, then Build ONOS ono ONOS Bench.
33 This step is usually skipped. Because in a Jenkins driven automated
34 test env. We want Jenkins jobs to pull&build for flexibility to handle
35 different versions of ONOS.
36 - Construct tests variables
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070037 """
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +000038 try:
39 from tests.dependencies.ONOSSetup import ONOSSetup
40 main.testSetUp = ONOSSetup()
41 except ImportError:
42 main.log.error( "ONOSSetup not found. exiting the test" )
43 main.exit()
44 main.testSetUp.envSetupDescription()
45 stepResult = main.FALSE
46 try:
47 main.MN1Ip = main.params[ 'MN' ][ 'ip1' ]
48 main.cellName = main.params[ 'ENV' ][ 'cellName' ]
49 main.apps = main.params[ 'ENV' ][ 'cellApps' ]
50 main.scale = ( main.params[ 'SCALE' ] ).split( "," )
51 main.ofpRoleRequest = main.params[ 'TSHARK' ][ 'ofpRoleRequest' ]
52 main.tsharkResultPath = main.params[ 'TSHARK' ][ 'tsharkResultPath' ]
53 main.sampleSize = int( main.params[ 'TEST' ][ 'sampleSize' ] )
54 main.warmUp = int( main.params[ 'TEST' ][ 'warmUp' ] )
55 main.dbFileName = main.params[ 'DATABASE' ][ 'dbName' ]
56 main.maxScale = int( main.params[ 'max' ] )
57 main.timeout = int( main.params[ 'TIMEOUT' ][ 'timeout' ] )
58 main.MNSleep = int( main.params[ 'SLEEP' ][ 'mininet' ] )
59 main.recoverySleep = int( main.params[ 'SLEEP' ][ 'recovery' ] )
60 main.debug = main.params[ 'TEST' ][ 'debug' ]
61 main.failoverSleep = int( main.params[ 'SLEEP' ][ 'failover' ] )
62 main.switchID = main.params[ 'SWITCH' ][ 'id' ]
63 main.topologySwitchCount = main.params[ 'TOPOLOGY' ][ 'switchCount' ]
64 main.topologyType = main.params[ 'TOPOLOGY' ][ 'type' ]
65 main.nodeNumToKill = int( main.params[ 'KILL' ][ 'nodeNum' ] )
66 main.failPercent = float( main.params[ 'TEST' ][ 'failPercent' ] )
67
68 if main.debug == "True":
69 main.debug = True
70 else:
71 main.debug = False
72
73 stepResult = main.testSetUp.envSetup()
74 main.log.info( "Create Database file " + main.dbFileName )
75 resultsDB = open( main.dbFileName, "w+" )
76 resultsDB.close()
77
78 except Exception as e:
79 main.testSetUp.envSetupException( e )
80 main.testSetUp.evnSetupConclusion( stepResult )
81
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +000082 def CASE1( self, main ):
83 # Clean up test environment and set up
84 import time
85 main.testSetUp.ONOSSetUp( main.Mininet1, main.Cluster, True,
86 cellName=main.cellName, killRemoveMax=False )
87 try:
88 from tests.dependencies.utils import Utils
89 except ImportError:
90 main.log.error( "Utils not found exiting the test" )
91 main.exit()
92 try:
93 main.Utils
94 except ( NameError, AttributeError ):
95 main.Utils = Utils()
96 main.Utils.mininetCleanup( main.Mininet1 )
97
98 main.step( "Starting up Mininet from command." )
99
100 mnCmd = " mn " + " --topo " + main.topologyType + "," + main.topologySwitchCount
101 for ctrl in main.Cluster.active():
102 mnCmd += " --controller remote,ip=" + ctrl.ipAddress
103
104 stepResult = main.Mininet1.startNet( mnCmd=mnCmd )
105
106 utilities.assert_equals( expect=main.TRUE,
107 actual=stepResult,
108 onpass="Mininet was set up correctly.",
109 onfail="Mininet was NOT set up correctly." )
110
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000111 def CASE2( self, main ):
112 """
113 Kill ONOS node, and measure the latency for INSTANCE_DEACTIVATED, MASTER_CHANGED, and role request
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700114 ( tshark time ), then bring the node back up.
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000115 """
116 import time
117 import datetime
118 import numpy
119 from tests.HA.dependencies.HA import HA
120
121 main.HA = HA()
122
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700123 main.latencyData = { 'kill_to_deactivation': [],
124 'deactivation_to_role_request': [] }
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000125
126 main.failCounter = 0
127 passingResult = True
128 criticalError = False
129
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700130 main.step( "Gathering data starting with "
131 + str( main.warmUp )
132 + " warm ups and a sample size of "
133 + str( main.sampleSize ) )
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000134
135 for iteration in range( 0, main.sampleSize + main.warmUp ):
136
137 main.log.info( "==========================================" )
138 main.log.info( "================iteration:{}==============".format( str( iteration + 1 ) ) )
139
140 ip_address = main.Cluster.active( 0 ).ipAddress
141 strNodeNumToKill = str( main.nodeNumToKill )
142
143 main.log.info( "Assigning mastership to ONOS node " + strNodeNumToKill )
144 main.Cluster.active( 0 ).CLI.deviceRole( main.switchID, ip_address )
145
146 main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
147 time.sleep( main.recoverySleep )
148 mastershipCheck = main.Cluster.active( 0 ).CLI.getMaster( main.switchID ) == ip_address
149
150 if not mastershipCheck:
151 main.log.warn( "Mastership is NOT as expected." )
152
153 with open( main.tsharkResultPath, "w" ) as tshark:
154 tshark.write( "" )
155 main.log.info( "Starting tshark capture." )
156 main.ONOSbench.tsharkGrep( main.ofpRoleRequest, main.tsharkResultPath )
157 time1 = time.time() * 1000.0
158
159 # Kill an ONOS node
160 main.log.info( "Killing ONOS node " + strNodeNumToKill + "." )
161 killresult = main.ONOSbench.onosKill( ip_address )
162 main.Cluster.runningNodes[ main.nodeNumToKill ].active = False
163
164 # Stop an ONOS node
165 main.log.info( "Stopping ONOS node " + strNodeNumToKill + "." )
166 stopresult = main.ONOSbench.onosStop( ip_address )
167
168 killStopResult = stopresult == killresult and True
169
170 if not killStopResult:
171 main.log.error( "ONOS node was NOT successfully stopped and killed." )
172 criticalError = True
173
174 time.sleep( main.failoverSleep )
175
176 # Stop tshark and get times
177 main.log.info( "Stopping tshark." )
178 main.ONOSbench.tsharkStop()
179
180 masterChangedLats = []
181 instanceDeactivatedLats = []
182
183 main.log.info( "Obtaining latencies from 'events' output." )
184 for CLInum in range( 0, main.Cluster.numCtrls - 1 ):
185 eventOutput = main.Cluster.active( CLInum ).CLI.events( args='-a' ).split( "\r\n" )
186 for line in reversed( eventOutput ):
187 if "INSTANCE_DEACTIVATED" in line and len( instanceDeactivatedLats ) == CLInum:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700188 deactivateTime = float( datetime.datetime.strptime(
Jeremy Ronquillo3f279e42017-11-03 10:17:01 -0700189 line.split()[ 0 ][ : 23 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000190 instanceDeactivatedLats.append( deactivateTime - time1 )
191 elif "MASTER_CHANGED" in line and len( masterChangedLats ) == CLInum:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700192 changedTime = float( datetime.datetime.strptime(
Jeremy Ronquillo3f279e42017-11-03 10:17:01 -0700193 line.split()[ 0 ][ : 23 ], "%Y-%m-%dT%H:%M:%S.%f" ).strftime( '%s.%f' ) ) * 1000.0
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000194 masterChangedLats.append( changedTime - time1 )
195 if len( instanceDeactivatedLats ) > CLInum and len( masterChangedLats ) > CLInum:
196 break
197
198 instanceDeactivatedLats.sort()
199 instanceDeactivated = instanceDeactivatedLats[ 0 ]
200
201 eventLatCheck = True if masterChangedLats and instanceDeactivated else False
202 if not eventLatCheck:
203 main.log.warn( "Latencies were NOT obtained from 'events' successfully." )
204
205 main.log.info( "Obtain latency from tshark output." )
206 tsharkLatCheck = True
207 with open( main.tsharkResultPath, "r" ) as resultFile:
208 resultText = resultFile.readline()
209 main.log.info( "Capture result: " + resultText )
210 resultText = resultText.split()
211 if len( resultText ) > 1:
212 roleRequestLat = int( float( resultText[ 1 ] ) * 1000.0 ) - time1
213 resultFile.close()
214 else:
215 main.log.error( "Tshark output file is NOT as expected." )
216 tsharkLatCheck = False
217 if not tsharkLatCheck:
218 main.log.warn( "Latency was NOT obtained from tshark successfully." )
219
220 validDataCheck = False
221 if tsharkLatCheck:
222 main.log.info( "instanceDeactivated: " + str( instanceDeactivated ) )
223 main.log.info( "roleRequestLat - instanceDeactivated: " + str( roleRequestLat - instanceDeactivated ) )
224 if iteration >= main.warmUp:
225 main.log.info( "Verifying that the data are valid." ) # Don't record data during a warm-up
226 validDataCheck = roleRequestLat - instanceDeactivated >= 0 and \
227 instanceDeactivated >= 0
228 if not validDataCheck:
229 main.log.warn( "Data are NOT valid." )
230
231 if eventLatCheck and tsharkLatCheck and validDataCheck:
232 main.log.info( "Saving data..." )
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700233 main.latencyData[ 'kill_to_deactivation' ]\
234 .append( instanceDeactivated )
235 main.latencyData[ 'deactivation_to_role_request' ]\
236 .append( roleRequestLat - instanceDeactivated )
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000237
238 # Restart ONOS node
239 main.log.info( "Restart ONOS node " + strNodeNumToKill + " and checking status of restart." )
240 startResult = main.ONOSbench.onosStart( ip_address )
241
242 if not startResult:
243 main.log.error( "ONOS nodes NOT successfully started." )
244 criticalError = True
245
246 # Check if ONOS is up yet
247 main.log.info( "Checking if ONOS node " + strNodeNumToKill + " is up." )
248 upResult = main.ONOSbench.isup( ip_address )
249
250 if not upResult:
251 main.log.error( "ONOS did NOT successfully restart." )
252 criticalError = True
253
254 # Restart CLI
255 main.log.info( "Restarting ONOS node " + strNodeNumToKill + "'s main.CLI." )
256 cliResult = main.Cluster.active( main.nodeNumToKill ).CLI.startOnosCli( ip_address )
257 main.Cluster.runningNodes[ main.nodeNumToKill ] .active = True
258
259 if not cliResult:
260 main.log.error( "ONOS CLI did NOT successfully restart." )
261 criticalError = True
262
263 main.log.info( "Checking ONOS nodes." )
264 nodeResults = utilities.retry( main.HA.nodesCheck,
265 False,
266 args=[ main.Cluster.active() ],
267 sleep=1,
268 attempts=3 )
269
270 if not nodeResults:
271 main.log.error( "Nodes check NOT successful." )
272 criticalError = True
273
274 main.log.info( "Sleeping for " + str( main.recoverySleep ) + " seconds..." )
275 time.sleep( main.recoverySleep )
276
277 if not ( mastershipCheck and
278 eventLatCheck and
279 tsharkLatCheck and
280 validDataCheck ) and \
281 iteration >= main.warmUp:
282 main.failCounter += 1
283 main.log.warn( "Iteration failed. Failure count: " + str( main.failCounter ) )
284 if float( main.failCounter ) / float( main.sampleSize ) >= main.failPercent or criticalError:
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700285 main.log.error( str( main.failPercent * 100 )
286 + "% or more of data is invalid, or a critical error has occurred." )
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000287 passingResult = False
288 break
289
290 utilities.assert_equals( expect=True, actual=passingResult,
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700291 onpass="Node scaling "
292 + str( main.Cluster.numCtrls )
293 + " data gathering was successful.",
294 onfail="Node scaling "
295 + str( main.Cluster.numCtrls )
296 + " data gathering FAILED. Stopping test." )
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000297 if not passingResult:
298 main.cleanAndExit()
299
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000300 def CASE3( self, main ):
301 """
302 Write results to database file.
303 Omit this case if you don't want to write to database.
304 """
305 import numpy
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700306 result = { 'avg': {}, 'stddev': {} }
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000307
308 for i in main.latencyData:
309 result[ 'avg' ][ i ] = numpy.average( main.latencyData[ i ] )
310 result[ 'stddev' ][ i ] = numpy.std( main.latencyData[ i ] )
311
312 main.log.info( "result: " + str( result ) )
313 with open( main.dbFileName, "a" ) as dbFile:
314 strToWrite = str( main.Cluster.numCtrls ) + ",'baremetal1'"
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700315 strToWrite += ",'" + main.commit.split()[1] + "'"
Jeremy Ronquillo818bc7c2017-08-09 17:14:53 +0000316 for i in result:
317 for j in result[ i ]:
318 strToWrite += "," + str( result[ i ][ j ] )
319 strToWrite += "\n"
320 dbFile.write( strToWrite )
321 dbFile.close()