blob: 8b81f595b23647d3e5cccee1629fb77469cc0e6a [file] [log] [blame]
Jon Halla478b852017-12-04 15:00:15 -08001"""
2Copyright 2015 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
23Description: This test is to determine if ONOS can handle
24 a minority of it's nodes restarting
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
Jon Hallab611372018-02-21 15:26:05 -080033CASE60: Initialize the upgrade.
34CASE61: Upgrade a minority of nodes PHASE 1
35CASE62: Transfer to new version. PHASE 2
36CASE63: Rollback the upgrade
37CASE64: Reset the upgrade state.
Jon Halla478b852017-12-04 15:00:15 -080038CASE7: Check state after control plane failure
39CASE8: Compare topo
40CASE9: Link s3-s28 down
41CASE10: Link s3-s28 up
42CASE11: Switch down
43CASE12: Switch up
44CASE13: Clean up
45CASE14: start election app on all onos nodes
46CASE15: Check that Leadership Election is still functional
47CASE16: Install Distributed Primitives app
48CASE17: Check for basic functionality with distributed primitives
49"""
50class HAupgradeRollback:
51
52 def __init__( self ):
53 self.default = ''
54
55 def CASE1( self, main ):
56 """
57 CASE1 is to compile ONOS and push it to the test machines
58
59 Startup sequence:
60 cell <name>
61 onos-verify-cell
62 NOTE: temporary - onos-remove-raft-logs
63 onos-uninstall
64 start mininet
65 git pull
66 mvn clean install
67 onos-package
68 onos-install -f
69 onos-wait-for-start
70 start cli sessions
71 start tcpdump
72 """
73 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
74 "initialization" )
75 # These are for csv plotting in jenkins
76 main.HAlabels = []
77 main.HAdata = []
78 try:
79 from tests.dependencies.ONOSSetup import ONOSSetup
80 main.testSetUp = ONOSSetup()
81 except ImportError:
82 main.log.error( "ONOSSetup not found. exiting the test" )
83 main.cleanAndExit()
84 main.testSetUp.envSetupDescription()
85 try:
86 from tests.HA.dependencies.HA import HA
87 main.HA = HA()
88 cellName = main.params[ 'ENV' ][ 'cellName' ]
89 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallab611372018-02-21 15:26:05 -080090 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Halla478b852017-12-04 15:00:15 -080091 except Exception as e:
92 main.testSetUp.envSetupException( e )
93 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hallab611372018-02-21 15:26:05 -080094
95 applyFuncs = [ main.HA.copyBackupConfig ]
96 applyArgs = [ None ]
97 try:
98 if main.params[ 'topology' ][ 'topoFile' ]:
99 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
100 else:
101 applyFuncs.append( main.HA.startingMininet )
102 applyArgs.append( None )
103 except (KeyError, IndexError):
104 applyFuncs.append( main.HA.startingMininet )
105 applyArgs.append( None )
Jon Halla478b852017-12-04 15:00:15 -0800106
You Wanga0f6ff62018-01-11 15:46:30 -0800107 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
Jon Hallab611372018-02-21 15:26:05 -0800108 extraApply=applyFuncs,
109 applyArgs=applyArgs,
110 extraClean=main.HA.cleanUpGenPartition,
111 includeCaseDesc=False )
Jon Halla478b852017-12-04 15:00:15 -0800112
113 main.HA.initialSetUp( serviceClean=True )
114
Jon Hallab611372018-02-21 15:26:05 -0800115 main.step( 'Set logging levels' )
116 logging = True
117 try:
118 logs = main.params.get( 'ONOS_Logging', False )
119 if logs:
120 for namespace, level in logs.items():
121 for ctrl in main.Cluster.active():
122 ctrl.CLI.logSet( level, namespace )
123 except AttributeError:
124 logging = False
125 utilities.assert_equals( expect=True, actual=logging,
126 onpass="Set log levels",
127 onfail="Failed to set log levels" )
128
Jon Halla478b852017-12-04 15:00:15 -0800129 def CASE2( self, main ):
130 """
131 Assign devices to controllers
132 """
133 main.HA.assignDevices( main )
134
Jon Hallab611372018-02-21 15:26:05 -0800135 def CASE102( self, main ):
136 """
137 Set up Spine-Leaf fabric topology in Mininet
138 """
139 main.HA.startTopology( main )
140
Jon Halla478b852017-12-04 15:00:15 -0800141 def CASE21( self, main ):
142 """
143 Assign mastership to controllers
144 """
145 main.HA.assignMastership( main )
146
147 def CASE3( self, main ):
148 """
149 Assign intents
150 """
151 main.HA.assignIntents( main )
152
153 def CASE4( self, main ):
154 """
155 Ping across added host intents
156 """
157 main.HA.pingAcrossHostIntent( main )
158
Jon Hallab611372018-02-21 15:26:05 -0800159 def CASE104( self, main ):
160 """
161 Ping Hosts
162 """
163 main.case( "Check connectivity" )
164 main.step( "Ping between all hosts" )
165 pingResult = main.Mininet1.pingall()
166 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
167 onpass="All Pings Passed",
168 onfail="Failed to ping between all hosts" )
169
Jon Halla478b852017-12-04 15:00:15 -0800170 def CASE5( self, main ):
171 """
172 Reading state of ONOS
173 """
174 main.HA.readingState( main )
175
176 def CASE60( self, main ):
177 """
178 Initialize the upgrade.
179 """
180 assert main, "main not defined"
181 assert utilities.assert_equals, "utilities.assert_equals not defined"
182 main.case( "Initialize upgrade" )
183 main.HA.upgradeInit( main )
184
185 def CASE61( self, main ):
186 """
187 Upgrade a minority of nodes PHASE 1
188 """
189 assert main, "main not defined"
190 assert utilities.assert_equals, "utilities.assert_equals not defined"
191 main.case( "Upgrade minority of ONOS nodes" )
192
193 main.step( "Checking ONOS Logs for errors" )
194 for ctrl in main.Cluster.active():
195 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
196 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
197
198 main.kill = []
199 n = len( main.Cluster.runningNodes ) # Number of nodes
200 p = n / 2 # Number of nodes in the minority
201 for i in range( p ):
202 main.kill.append( main.Cluster.runningNodes[ i ] ) # ONOS node to kill, listed by index in main.nodes
203 main.HA.upgradeNodes( main )
204
205 main.step( "Checking ONOS nodes" )
206 nodeResults = utilities.retry( main.Cluster.nodesCheck,
207 False,
208 sleep=15,
209 attempts=5 )
210 utilities.assert_equals( expect=True, actual=nodeResults,
211 onpass="Nodes check successful",
212 onfail="Nodes check NOT successful" )
213
214 if not nodeResults:
215 for ctrl in main.Cluster.active():
216 main.log.debug( "{} components not ACTIVE: \n{}".format(
217 ctrl.name,
218 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
219 main.log.error( "Failed to start ONOS, stopping test" )
220 main.cleanAndExit()
221
222 def CASE62( self, main ):
223 """
224 Transfer to new version. PHASE 2
225 """
226 assert main, "main not defined"
227 assert utilities.assert_equals, "utilities.assert_equals not defined"
228 main.case( "Start the upgrade" )
229
230 main.step( "Send the command to switch to new version" )
231 ctrl = main.Cluster.next().CLI
232 upgraded = ctrl.issuUpgrade()
233 utilities.assert_equals( expect=main.TRUE, actual=upgraded,
234 onpass="Cluster has moved to the upgraded nodes",
235 onfail="Error transitioning to the upgraded nodes" )
236
237 main.step( "Check the status of the upgrade" )
238 ctrl = main.Cluster.next().CLI
239 status = ctrl.issu()
240 main.log.debug( status )
241 # TODO: check things here?
242
243 main.step( "Checking ONOS nodes" )
244 nodeResults = utilities.retry( main.Cluster.nodesCheck,
245 False,
246 sleep=15,
247 attempts=5 )
248 utilities.assert_equals( expect=True, actual=nodeResults,
249 onpass="Nodes check successful",
250 onfail="Nodes check NOT successful" )
251
252 def CASE63( self, main ):
253 """
254 Rollback the upgrade
255 """
256 main.case( "Rollback the upgrade" )
257 main.step( "Rollbak the upgrade" )
258 # send rollback command
259 ctrl = main.Cluster.next().CLI
260 rollback = ctrl.issuRollback()
261 utilities.assert_equals( expect=main.TRUE, actual=rollback,
262 onpass="Upgrade has been rolled back",
263 onfail="Error rolling back the upgrade" )
264
265 main.step( "Check the status of the upgrade" )
266 ctrl = main.Cluster.next().CLI
267 status = ctrl.issu()
268 main.log.debug( status )
269
270 # restart and reinstall old version on upgrade nodes
271 for ctrl in main.kill:
272 ctrl.onosStop( ctrl.ipAddress )
273 ctrl.onosUninstall( ctrl.ipAddress )
274 ctrl.onosInstall( options="-f", node=ctrl.ipAddress )
275 ctrl.onosSecureSSH( node=ctrl.ipAddress )
276 ctrl.startOnosCli( ctrl.ipAddress, waitForStart=True )
277 main.step( "Checking ONOS nodes" )
278 nodeResults = utilities.retry( main.Cluster.nodesCheck,
279 False,
280 sleep=15,
281 attempts=5 )
282 utilities.assert_equals( expect=True, actual=nodeResults,
283 onpass="Nodes check successful",
284 onfail="Nodes check NOT successful" )
285
286 if not nodeResults:
287 for ctrl in main.Cluster.active():
288 main.log.debug( "{} components not ACTIVE: \n{}".format(
289 ctrl.name,
290 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
291 main.log.error( "Failed to start ONOS, stopping test" )
292 main.cleanAndExit()
293
294 def CASE64( self, main ):
295 """
296 Reset the upgrade state.
297 """
298 assert main, "main not defined"
299 assert utilities.assert_equals, "utilities.assert_equals not defined"
300 main.case( "Reset the upgrade state" )
301
302 main.step( "Send the command to reset the upgrade" )
303 ctrl = main.Cluster.next().CLI
304 committed = ctrl.issuCommit()
305 utilities.assert_equals( expect=main.TRUE, actual=committed,
306 onpass="Upgrade has been committed",
307 onfail="Error committing the upgrade" )
308
309 main.step( "Check the status of the upgrade" )
310 ctrl = main.Cluster.next().CLI
311 status = ctrl.issu()
312 main.log.debug( status )
313 # TODO: check things here?
314
315 def CASE7( self, main ):
316 """
317 Check state after ONOS failure
318 """
319 try:
320 main.kill
321 except AttributeError:
322 main.kill = []
323
324 main.HA.checkStateAfterEvent( main, afterWhich=0 )
325 main.step( "Leadership Election is still functional" )
326 # Test of LeadershipElection
327 leaderList = []
328
329 restarted = []
330 for ctrl in main.kill:
331 restarted.append( ctrl.ipAddress )
332 leaderResult = main.TRUE
333
334 for ctrl in main.Cluster.active():
335 leaderN = ctrl.electionTestLeader()
336 leaderList.append( leaderN )
337 if leaderN == main.FALSE:
338 # error in response
339 main.log.error( "Something is wrong with " +
340 "electionTestLeader function, check the" +
341 " error logs" )
342 leaderResult = main.FALSE
343 elif leaderN is None:
344 main.log.error( ctrl.name +
345 " shows no leader for the election-app was" +
346 " elected after the old one died" )
347 leaderResult = main.FALSE
348 elif leaderN in restarted:
349 main.log.error( ctrl.name + " shows " + str( leaderN ) +
350 " as leader for the election-app, but it " +
351 "was restarted" )
352 leaderResult = main.FALSE
353 if len( set( leaderList ) ) != 1:
354 leaderResult = main.FALSE
355 main.log.error(
356 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800357 main.log.debug( leaderList )
Jon Halla478b852017-12-04 15:00:15 -0800358 utilities.assert_equals(
359 expect=main.TRUE,
360 actual=leaderResult,
361 onpass="Leadership election passed",
362 onfail="Something went wrong with Leadership election" )
363
364 def CASE8( self, main ):
365 """
366 Compare topo
367 """
368 main.HA.compareTopo( main )
369
370 def CASE9( self, main ):
371 """
Jon Hallab611372018-02-21 15:26:05 -0800372 Link down
Jon Halla478b852017-12-04 15:00:15 -0800373 """
Jon Hallab611372018-02-21 15:26:05 -0800374 src = main.params['kill']['linkSrc']
375 dst = main.params['kill']['linkDst']
376 main.HA.linkDown( main, src, dst )
Jon Halla478b852017-12-04 15:00:15 -0800377
378 def CASE10( self, main ):
379 """
Jon Hallab611372018-02-21 15:26:05 -0800380 Link up
Jon Halla478b852017-12-04 15:00:15 -0800381 """
Jon Hallab611372018-02-21 15:26:05 -0800382 src = main.params['kill']['linkSrc']
383 dst = main.params['kill']['linkDst']
384 main.HA.linkUp( main, src, dst )
Jon Halla478b852017-12-04 15:00:15 -0800385
386 def CASE11( self, main ):
387 """
388 Switch Down
389 """
390 # NOTE: You should probably run a topology check after this
391 main.HA.switchDown( main )
392
393 def CASE12( self, main ):
394 """
395 Switch Up
396 """
397 # NOTE: You should probably run a topology check after this
398 main.HA.switchUp( main )
399
400 def CASE13( self, main ):
401 """
402 Clean up
403 """
404 main.HAlabels.append( "Restart" )
405 main.HAdata.append( str( main.restartTime ) )
406 main.HA.cleanUp( main )
407
408 def CASE14( self, main ):
409 """
Jon Hallab611372018-02-21 15:26:05 -0800410 Start election app on all onos nodes
Jon Halla478b852017-12-04 15:00:15 -0800411 """
412 main.HA.startElectionApp( main )
413
414 def CASE15( self, main ):
415 """
416 Check that Leadership Election is still functional
417 15.1 Run election on each node
418 15.2 Check that each node has the same leaders and candidates
419 15.3 Find current leader and withdraw
420 15.4 Check that a new node was elected leader
421 15.5 Check that that new leader was the candidate of old leader
422 15.6 Run for election on old leader
423 15.7 Check that oldLeader is a candidate, and leader if only 1 node
424 15.8 Make sure that the old leader was added to the candidate list
425
426 old and new variable prefixes refer to data from before vs after
427 withdrawl and later before withdrawl vs after re-election
428 """
429 main.HA.isElectionFunctional( main )
430
431 def CASE16( self, main ):
432 """
433 Install Distributed Primitives app
434 """
435 main.HA.installDistributedPrimitiveApp( main )
436
437 def CASE17( self, main ):
438 """
439 Check for basic functionality with distributed primitives
440 """
441 main.HA.checkDistPrimitivesFunc( main )