blob: 8b06dbd29a62d4216182de633784b019644ecd1e [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
2Description: This test is to determine if ONOS can handle
3 all of it's nodes restarting
4
5List of test cases:
6CASE1: Compile ONOS and push it to the test machines
7CASE2: Assign devices to controllers
8CASE21: Assign mastership to controllers
9CASE3: Assign intents
10CASE4: Ping across added host intents
11CASE5: Reading state of ONOS
12CASE6: The Failure case.
13CASE7: Check state after control plane failure
14CASE8: Compare topo
15CASE9: Link s3-s28 down
16CASE10: Link s3-s28 up
17CASE11: Switch down
18CASE12: Switch up
19CASE13: Clean up
20CASE14: start election app on all onos nodes
21CASE15: Check that Leadership Election is still functional
22CASE16: Install Distributed Primitives app
23CASE17: Check for basic functionality with distributed primitives
24"""
Jon Hall5cf14d52015-07-16 12:15:19 -070025class HAclusterRestart:
26
27 def __init__( self ):
28 self.default = ''
29
30 def CASE1( self, main ):
31 """
32 CASE1 is to compile ONOS and push it to the test machines
33
34 Startup sequence:
35 cell <name>
36 onos-verify-cell
37 NOTE: temporary - onos-remove-raft-logs
38 onos-uninstall
39 start mininet
40 git pull
41 mvn clean install
42 onos-package
43 onos-install -f
44 onos-wait-for-start
45 start cli sessions
46 start tcpdump
47 """
Jon Halle1a3b752015-07-22 13:02:46 -070048 import imp
Jon Hallf3d16e72015-12-16 17:45:08 -080049 import time
Jon Halla440e872016-03-31 15:15:50 -070050 import json
Jon Hall5cf14d52015-07-16 12:15:19 -070051 main.log.info( "ONOS HA test: Restart all ONOS nodes - " +
52 "initialization" )
Jon Hall5cf14d52015-07-16 12:15:19 -070053 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070054 main.HAlabels = []
55 main.HAdata = []
56 try:
57 from tests.dependencies.ONOSSetup import ONOSSetup
58 main.testSetUp = ONOSSetup()
59 except ImportError:
60 main.log.error( "ONOSSetup not found exiting the test" )
61 main.exit()
62 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070063 try:
Jon Hall53c5e662016-04-13 16:06:56 -070064 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070065 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070066 # load some variables from the params file
67 cellName = main.params[ 'ENV' ][ 'cellName' ]
68 main.apps = main.params[ 'ENV' ][ 'appString' ]
69 main.numCtrls = int( main.params[ 'num_controllers' ] )
70 if main.ONOSbench.maxNodes and \
71 main.ONOSbench.maxNodes < main.numCtrls:
72 main.numCtrls = int( main.ONOSbench.maxNodes )
73 main.maxNodes = main.numCtrls
74 stepResult = main.testSetUp.envSetup( hasNode=True )
Jon Halle1a3b752015-07-22 13:02:46 -070075 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070076 main.testSetUp.envSetupException( e )
77 main.testSetUp.evnSetupConclusion( stepResult )
78 main.HA.generateGraph( "HAclusterRestart" )
Jon Halle1a3b752015-07-22 13:02:46 -070079
Devin Lim58046fa2017-07-05 16:55:00 -070080 main.testSetUp.ONOSSetUp( main.Mininet1, cellName=cellName, removeLog=True,
81 extraApply=main.HA.startingMininet )
Jon Hall5cf14d52015-07-16 12:15:19 -070082
Devin Lim58046fa2017-07-05 16:55:00 -070083 main.HA.initialSetUp()
Jon Hall9d2dcad2016-04-08 10:15:20 -070084
Jon Hall5cf14d52015-07-16 12:15:19 -070085 def CASE2( self, main ):
86 """
87 Assign devices to controllers
88 """
Devin Lim58046fa2017-07-05 16:55:00 -070089 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -070090
91 def CASE21( self, main ):
92 """
93 Assign mastership to controllers
94 """
Devin Lim58046fa2017-07-05 16:55:00 -070095 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -070096 def CASE3( self, main ):
97 """
98 Assign intents
99 """
Devin Lim58046fa2017-07-05 16:55:00 -0700100 main.HA.assignIntents(main)
Jon Hall5cf14d52015-07-16 12:15:19 -0700101
102 def CASE4( self, main ):
103 """
104 Ping across added host intents
105 """
Devin Lim58046fa2017-07-05 16:55:00 -0700106 main.HA.pingAcrossHostIntent( main, True, True )
Jon Hall5cf14d52015-07-16 12:15:19 -0700107
108 def CASE5( self, main ):
109 """
110 Reading state of ONOS
111 """
Devin Lim58046fa2017-07-05 16:55:00 -0700112 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700113
114 def CASE6( self, main ):
115 """
116 The Failure case.
117 """
118 import time
Jon Halle1a3b752015-07-22 13:02:46 -0700119 assert main.numCtrls, "main.numCtrls not defined"
Jon Hall5cf14d52015-07-16 12:15:19 -0700120 assert main, "main not defined"
121 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Halle1a3b752015-07-22 13:02:46 -0700122 assert main.CLIs, "main.CLIs not defined"
123 assert main.nodes, "main.nodes not defined"
Jon Hall5cf14d52015-07-16 12:15:19 -0700124 try:
Devin Lim58046fa2017-07-05 16:55:00 -0700125 main.HAlabels
126 except ( NameError, AttributeError ):
127 main.log.error( "main.HAlabels not defined, setting to []" )
128 main.HAlabels = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700129 try:
Devin Lim58046fa2017-07-05 16:55:00 -0700130 main.HAdata
131 except ( NameError, AttributeError ):
132 main.log.error( "main.HAdata not defined, setting to []" )
133 main.HAdata = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700134 # Reset non-persistent variables
135 try:
136 iCounterValue = 0
137 except NameError:
138 main.log.error( "iCounterValue not defined, setting to 0" )
139 iCounterValue = 0
140
141 main.case( "Restart entire ONOS cluster" )
142
Jon Hall5ec6b1b2015-09-17 18:20:14 -0700143 main.step( "Checking ONOS Logs for errors" )
144 for node in main.nodes:
145 main.log.debug( "Checking logs for errors on " + node.name + ":" )
146 main.log.warn( main.ONOSbench.checkLogs( node.ip_address ) )
147
Jon Hall5cf14d52015-07-16 12:15:19 -0700148 main.step( "Killing ONOS nodes" )
149 killResults = main.TRUE
150 killTime = time.time()
Jon Halle1a3b752015-07-22 13:02:46 -0700151 for node in main.nodes:
Jon Hall5cf14d52015-07-16 12:15:19 -0700152 killed = main.ONOSbench.onosKill( node.ip_address )
153 killResults = killResults and killed
154 utilities.assert_equals( expect=main.TRUE, actual=killResults,
155 onpass="ONOS nodes killed",
156 onfail="ONOS kill unsuccessful" )
157
158 main.step( "Checking if ONOS is up yet" )
159 for i in range( 2 ):
160 onosIsupResult = main.TRUE
Jon Halle1a3b752015-07-22 13:02:46 -0700161 for node in main.nodes:
Jon Hall5cf14d52015-07-16 12:15:19 -0700162 started = main.ONOSbench.isup( node.ip_address )
163 if not started:
164 main.log.error( node.name + " didn't start!" )
165 onosIsupResult = onosIsupResult and started
166 if onosIsupResult == main.TRUE:
167 break
168 utilities.assert_equals( expect=main.TRUE, actual=onosIsupResult,
169 onpass="ONOS restarted",
170 onfail="ONOS restart NOT successful" )
171
Jon Hall6509dbf2016-06-21 17:01:17 -0700172 main.step( "Starting ONOS CLI sessions" )
Jon Hall5cf14d52015-07-16 12:15:19 -0700173 cliResults = main.TRUE
174 threads = []
Jon Halle1a3b752015-07-22 13:02:46 -0700175 for i in range( main.numCtrls ):
Jon Hallf37d44d2017-05-24 10:37:30 -0700176 t = main.Thread( target=main.CLIs[ i ].startOnosCli,
Jon Hall5cf14d52015-07-16 12:15:19 -0700177 name="startOnosCli-" + str( i ),
Jon Hallf37d44d2017-05-24 10:37:30 -0700178 args=[ main.nodes[ i ].ip_address ] )
Jon Hall5cf14d52015-07-16 12:15:19 -0700179 threads.append( t )
180 t.start()
181
182 for t in threads:
183 t.join()
184 cliResults = cliResults and t.result
185 utilities.assert_equals( expect=main.TRUE, actual=cliResults,
186 onpass="ONOS cli started",
187 onfail="ONOS clis did not restart" )
188
Jon Hall6e709752016-02-01 13:38:46 -0800189 for i in range( 10 ):
190 ready = True
Jon Hall7ac7bc32016-05-05 10:57:02 -0700191 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700192 cli = main.CLIs[ i ]
Jon Hall6e709752016-02-01 13:38:46 -0800193 output = cli.summary()
194 if not output:
195 ready = False
Jon Halld2871c22016-07-26 11:01:14 -0700196 if ready:
197 break
Jon Hall6e709752016-02-01 13:38:46 -0800198 time.sleep( 30 )
199 utilities.assert_equals( expect=True, actual=ready,
200 onpass="ONOS summary command succeded",
201 onfail="ONOS summary command failed" )
202 if not ready:
203 main.cleanup()
204 main.exit()
205
Jon Hall5cf14d52015-07-16 12:15:19 -0700206 # Grab the time of restart so we chan check how long the gossip
207 # protocol has had time to work
208 main.restartTime = time.time() - killTime
209 main.log.debug( "Restart time: " + str( main.restartTime ) )
Devin Lim58046fa2017-07-05 16:55:00 -0700210 main.HAlabels.append( "Restart" )
211 main.HAdata.append( str( main.restartTime ) )
Jon Hall5cf14d52015-07-16 12:15:19 -0700212
Jon Hall5cf14d52015-07-16 12:15:19 -0700213 # Rerun for election on restarted nodes
214 runResults = main.TRUE
Jon Hall7ac7bc32016-05-05 10:57:02 -0700215 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700216 cli = main.CLIs[ i ]
Jon Halla440e872016-03-31 15:15:50 -0700217 run = cli.electionTestRun()
Jon Hall5cf14d52015-07-16 12:15:19 -0700218 if run != main.TRUE:
219 main.log.error( "Error running for election on " + cli.name )
220 runResults = runResults and run
221 utilities.assert_equals( expect=main.TRUE, actual=runResults,
222 onpass="Reran for election",
223 onfail="Failed to rerun for election" )
224
225 # TODO: Make this configurable
226 time.sleep( 60 )
Jon Hallf37d44d2017-05-24 10:37:30 -0700227 node = main.activeNodes[ 0 ]
228 main.log.debug( main.CLIs[ node ].nodes( jsonFormat=False ) )
229 main.log.debug( main.CLIs[ node ].leaders( jsonFormat=False ) )
230 main.log.debug( main.CLIs[ node ].partitions( jsonFormat=False ) )
Jon Hall5cf14d52015-07-16 12:15:19 -0700231
232 def CASE7( self, main ):
233 """
234 Check state after ONOS failure
235 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700236 # NOTE: Store has no durability, so intents are lost across system
237 # restarts
Devin Lim58046fa2017-07-05 16:55:00 -0700238 main.HA.checkStateAfterONOS( main, afterWhich=0, isRestart=True )
Jon Hall5cf14d52015-07-16 12:15:19 -0700239
Jon Hall5cf14d52015-07-16 12:15:19 -0700240 main.step( "Leadership Election is still functional" )
241 # Test of LeadershipElection
242 leaderList = []
243 leaderResult = main.TRUE
Jon Halla440e872016-03-31 15:15:50 -0700244
245 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700246 cli = main.CLIs[ i ]
Jon Hall5cf14d52015-07-16 12:15:19 -0700247 leaderN = cli.electionTestLeader()
248 leaderList.append( leaderN )
249 if leaderN == main.FALSE:
250 # error in response
251 main.log.error( "Something is wrong with " +
252 "electionTestLeader function, check the" +
253 " error logs" )
254 leaderResult = main.FALSE
255 elif leaderN is None:
256 main.log.error( cli.name +
257 " shows no leader for the election-app." )
258 leaderResult = main.FALSE
259 if len( set( leaderList ) ) != 1:
260 leaderResult = main.FALSE
261 main.log.error(
262 "Inconsistent view of leader for the election test app" )
263 # TODO: print the list
264 utilities.assert_equals(
265 expect=main.TRUE,
266 actual=leaderResult,
267 onpass="Leadership election passed",
268 onfail="Something went wrong with Leadership election" )
269
270 def CASE8( self, main ):
271 """
272 Compare topo
273 """
Devin Lim58046fa2017-07-05 16:55:00 -0700274 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700275
Jon Hall5cf14d52015-07-16 12:15:19 -0700276 def CASE9( self, main ):
277 """
278 Link s3-s28 down
279 """
Devin Lim58046fa2017-07-05 16:55:00 -0700280 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700281
282 def CASE10( self, main ):
283 """
284 Link s3-s28 up
285 """
Devin Lim58046fa2017-07-05 16:55:00 -0700286 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700287
288 def CASE11( self, main ):
289 """
290 Switch Down
291 """
292 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700293 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700294
295 def CASE12( self, main ):
296 """
297 Switch Up
298 """
299 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700300 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700301
302 def CASE13( self, main ):
303 """
304 Clean up
305 """
Devin Lim58046fa2017-07-05 16:55:00 -0700306 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700307
308 def CASE14( self, main ):
309 """
310 start election app on all onos nodes
311 """
Devin Lim58046fa2017-07-05 16:55:00 -0700312 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700313
314 def CASE15( self, main ):
315 """
316 Check that Leadership Election is still functional
acsmars9475b1c2015-08-28 18:02:08 -0700317 15.1 Run election on each node
318 15.2 Check that each node has the same leaders and candidates
319 15.3 Find current leader and withdraw
320 15.4 Check that a new node was elected leader
321 15.5 Check that that new leader was the candidate of old leader
322 15.6 Run for election on old leader
323 15.7 Check that oldLeader is a candidate, and leader if only 1 node
324 15.8 Make sure that the old leader was added to the candidate list
325
326 old and new variable prefixes refer to data from before vs after
327 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700328 """
Devin Lim58046fa2017-07-05 16:55:00 -0700329 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700330
331 def CASE16( self, main ):
332 """
333 Install Distributed Primitives app
334 """
Devin Lim58046fa2017-07-05 16:55:00 -0700335 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700336
337 def CASE17( self, main ):
338 """
339 Check for basic functionality with distributed primitives
340 """
Devin Lim58046fa2017-07-05 16:55:00 -0700341 main.HA.checkDistPrimitivesFunc( main )