blob: 19b0ec379515a9189ea930473cc53f2e33ca8db9 [file] [log] [blame]
Jon Hall9ebd1bd2016-04-19 01:37:17 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2016 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall9ebd1bd2016-04-19 01:37:17 -070022Description: This test is to determine if ONOS can handle
23 dynamic scaling of the cluster size.
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE6: The scaling case.
33CASE7: Check state after control plane failure
34CASE8: Compare topo
35CASE9: Link s3-s28 down
36CASE10: Link s3-s28 up
37CASE11: Switch down
38CASE12: Switch up
39CASE13: Clean up
40CASE14: start election app on all onos nodes
41CASE15: Check that Leadership Election is still functional
42CASE16: Install Distributed Primitives app
43CASE17: Check for basic functionality with distributed primitives
44"""
Jon Hall9ebd1bd2016-04-19 01:37:17 -070045class HAscaling:
46
47 def __init__( self ):
48 self.default = ''
49
50 def CASE1( self, main ):
51 """
52 CASE1 is to compile ONOS and push it to the test machines
53
54 Startup sequence:
55 cell <name>
56 onos-verify-cell
57 NOTE: temporary - onos-remove-raft-logs
58 onos-uninstall
59 start mininet
60 git pull
61 mvn clean install
62 onos-package
63 onos-install -f
64 onos-wait-for-start
65 start cli sessions
66 start tcpdump
67 """
Jon Hall3e6edb32018-08-21 16:20:30 -070068 import re
Jon Hall9ebd1bd2016-04-19 01:37:17 -070069 main.log.info( "ONOS HA test: Restart all ONOS nodes - " +
70 "initialization" )
Jon Hall9ebd1bd2016-04-19 01:37:17 -070071 # set global variables
72 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
Jon Hall4f360bc2017-09-07 10:19:52 -070079 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070080 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070081 main.testSetUp.envSetupDescription()
Jon Halla478b852017-12-04 15:00:15 -080082 main.Cluster.numCtrls = 1
Jon Hall9ebd1bd2016-04-19 01:37:17 -070083 try:
84 from tests.HA.dependencies.HA import HA
85 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070086 # load some variables from the params file
87 cellName = main.params[ 'ENV' ][ 'cellName' ]
88 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallab611372018-02-21 15:26:05 -080089 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Hall9ebd1bd2016-04-19 01:37:17 -070090 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070091 main.testSetUp.envSetupException( e )
92 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hall9ebd1bd2016-04-19 01:37:17 -070093
Jon Hall3e6edb32018-08-21 16:20:30 -070094 main.scaling = main.params[ 'scaling' ].split( "," )
95 main.log.debug( main.scaling )
96 scale = main.scaling.pop( 0 )
97 main.log.debug( scale )
98 main.Cluster.setRunningNode( int( re.search( "\d+", scale ).group( 0 ) ) )
99
Jon Hall5a5c8432018-11-28 11:39:57 -0800100 applyFuncs = [ main.HA.removeKarafConsoleLogging ]
101 applyArgs = [ None ]
Jon Hallab611372018-02-21 15:26:05 -0800102 try:
103 if main.params[ 'topology' ][ 'topoFile' ]:
104 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
105 else:
106 applyFuncs.append( main.HA.startingMininet )
107 applyArgs.append( None )
108 except (KeyError, IndexError):
109 applyFuncs.append( main.HA.startingMininet )
110 applyArgs.append( None )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700111
Jon Hall3e6edb32018-08-21 16:20:30 -0700112 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName,
Jon Hallab611372018-02-21 15:26:05 -0800113 extraApply=applyFuncs,
114 applyArgs=applyArgs,
Jon Hallab611372018-02-21 15:26:05 -0800115 installMax=True,
Jon Hall3e6edb32018-08-21 16:20:30 -0700116 atomixClusterSize=3,
Jon Hallab611372018-02-21 15:26:05 -0800117 includeCaseDesc=False )
Jon Hall3e6edb32018-08-21 16:20:30 -0700118 main.HA.initialSetUp()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700119
Jon Hallab611372018-02-21 15:26:05 -0800120 main.step( 'Set logging levels' )
121 logging = True
122 try:
123 logs = main.params.get( 'ONOS_Logging', False )
124 if logs:
125 for namespace, level in logs.items():
126 for ctrl in main.Cluster.active():
127 ctrl.CLI.logSet( level, namespace )
128 except AttributeError:
129 logging = False
130 utilities.assert_equals( expect=True, actual=logging,
131 onpass="Set log levels",
132 onfail="Failed to set log levels" )
133
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700134 def CASE2( self, main ):
135 """
136 Assign devices to controllers
137 """
Devin Lim58046fa2017-07-05 16:55:00 -0700138 main.HA.assignDevices( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700139
Jon Hallab611372018-02-21 15:26:05 -0800140 def CASE102( self, main ):
141 """
142 Set up Spine-Leaf fabric topology in Mininet
143 """
144 main.HA.startTopology( main )
145
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700146 def CASE21( self, main ):
147 """
148 Assign mastership to controllers
149 """
Devin Lim58046fa2017-07-05 16:55:00 -0700150 main.HA.assignMastership( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700151
152 def CASE3( self, main ):
153 """
154 Assign intents
155 """
Devin Lim58046fa2017-07-05 16:55:00 -0700156 main.HA.assignIntents( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700157
158 def CASE4( self, main ):
159 """
160 Ping across added host intents
161 """
Jon Hallca319892017-06-15 15:25:22 -0700162 main.HA.pingAcrossHostIntent( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700163
Jon Hallab611372018-02-21 15:26:05 -0800164 def CASE104( self, main ):
165 """
166 Ping Hosts
167 """
168 main.case( "Check connectivity" )
169 main.step( "Ping between all hosts" )
170 pingResult = main.Mininet1.pingall()
171 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
172 onpass="All Pings Passed",
173 onfail="Failed to ping between all hosts" )
174
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700175 def CASE5( self, main ):
176 """
177 Reading state of ONOS
178 """
Devin Lim58046fa2017-07-05 16:55:00 -0700179 main.HA.readingState( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700180
181 def CASE6( self, main ):
182 """
183 The Scaling case.
184 """
185 import time
186 import re
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700187 assert main, "main not defined"
188 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700189 try:
Devin Lim58046fa2017-07-05 16:55:00 -0700190 main.HAlabels
191 except ( NameError, AttributeError ):
192 main.log.error( "main.HAlabels not defined, setting to []" )
193 main.HAlabels = []
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700194 try:
Devin Lim58046fa2017-07-05 16:55:00 -0700195 main.HAdata
196 except ( NameError, AttributeError ):
197 main.log.error( "main.HAdata not defined, setting to []" )
198 main.HAdata = []
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700199
Jon Hall69b2b982016-05-11 12:04:59 -0700200 main.case( "Scale the number of nodes in the ONOS cluster" )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700201
202 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700203 for ctrl in main.Cluster.active():
204 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
205 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700206
207 """
Jon Hall3e6edb32018-08-21 16:20:30 -0700208 pop # of nodes from a list, might look like 1,3,5,7,5,3...
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700209 install/deactivate node as needed
210 """
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700211 try:
Jon Hall3e6edb32018-08-21 16:20:30 -0700212 prevNodes = main.Cluster.getRunningPos()
213 prevSize = main.Cluster.numCtrls
Jon Hallf37d44d2017-05-24 10:37:30 -0700214 scale = main.scaling.pop( 0 )
Devin Lim142b5342017-07-20 15:22:39 -0700215 main.Cluster.setRunningNode( int( re.search( "\d+", scale ).group( 0 ) ) )
Jon Hall3e6edb32018-08-21 16:20:30 -0700216 main.step( "Scaling from {} to {} nodes".format(
217 prevSize, main.Cluster.numCtrls ) )
218 except IndexError as e:
219 main.log.debug( e )
Devin Lim44075962017-08-11 10:56:37 -0700220 main.cleanAndExit()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700221
Jon Hallab611372018-02-21 15:26:05 -0800222 activeNodes = range( 0, main.Cluster.numCtrls )
Devin Lim142b5342017-07-20 15:22:39 -0700223 newNodes = [ x for x in activeNodes if x not in prevNodes ]
Jon Hall3e6edb32018-08-21 16:20:30 -0700224 deadNodes = [ x for x in prevNodes if x not in activeNodes ]
Jon Halla1e8e512018-05-11 13:30:57 -0700225 main.Cluster.clearActive()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700226 main.step( "Start new nodes" ) # OR stop old nodes?
227 started = main.TRUE
Jon Hall3e6edb32018-08-21 16:20:30 -0700228 stopped = main.TRUE
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700229 for i in newNodes:
Jon Hall3e6edb32018-08-21 16:20:30 -0700230 main.log.debug( "Starting " + str( main.Cluster.runningNodes[ i ].ipAddress ) )
231 started = main.ONOSbench.onosStart( main.Cluster.runningNodes[ i ].ipAddress ) and started
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700232 utilities.assert_equals( expect=main.TRUE, actual=started,
233 onpass="ONOS started",
234 onfail="ONOS start NOT successful" )
Jon Hall3e6edb32018-08-21 16:20:30 -0700235 for i in deadNodes:
236 main.log.debug( "Stopping " + str( main.Cluster.controllers[ i ].ipAddress ) )
237 stopped = main.ONOSbench.onosStop( main.Cluster.controllers[ i ].ipAddress ) and stopped
238 utilities.assert_equals( expect=main.TRUE, actual=stopped,
239 onpass="ONOS stopped",
240 onfail="ONOS stop NOT successful" )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700241
Devin Lim142b5342017-07-20 15:22:39 -0700242 main.testSetUp.setupSsh( main.Cluster )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700243
Devin Lim142b5342017-07-20 15:22:39 -0700244 main.testSetUp.checkOnosService( main.Cluster )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700245
Devin Lim142b5342017-07-20 15:22:39 -0700246 main.Cluster.startCLIs()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700247
248 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800249 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700250 False,
Jon Hall3e6edb32018-08-21 16:20:30 -0700251 attempts=90 )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700252 utilities.assert_equals( expect=True, actual=nodeResults,
253 onpass="Nodes check successful",
254 onfail="Nodes check NOT successful" )
255
256 for i in range( 10 ):
257 ready = True
Devin Lim142b5342017-07-20 15:22:39 -0700258 for ctrl in main.Cluster.active():
259 output = ctrl.CLI.summary()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700260 if not output:
261 ready = False
262 if ready:
263 break
264 time.sleep( 30 )
265 utilities.assert_equals( expect=True, actual=ready,
266 onpass="ONOS summary command succeded",
267 onfail="ONOS summary command failed" )
268 if not ready:
Devin Lim44075962017-08-11 10:56:37 -0700269 main.cleanAndExit()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700270
271 # Rerun for election on new nodes
272 runResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700273 for ctrl in main.Cluster.active():
274 run = ctrl.CLI.electionTestRun()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700275 if run != main.TRUE:
Devin Lim142b5342017-07-20 15:22:39 -0700276 main.log.error( "Error running for election on " + ctrl.name )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700277 runResults = runResults and run
278 utilities.assert_equals( expect=main.TRUE, actual=runResults,
279 onpass="Reran for election",
280 onfail="Failed to rerun for election" )
281
Devin Lim142b5342017-07-20 15:22:39 -0700282 main.HA.commonChecks()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700283
284 def CASE7( self, main ):
285 """
286 Check state after ONOS scaling
287 """
Devin Lim142b5342017-07-20 15:22:39 -0700288 main.HA.checkStateAfterEvent( main, afterWhich=1 )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700289
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700290 main.step( "Leadership Election is still functional" )
291 # Test of LeadershipElection
292 leaderList = []
293 leaderResult = main.TRUE
294
Devin Lim142b5342017-07-20 15:22:39 -0700295 for ctrl in main.Cluster.active():
296 leaderN = ctrl.CLI.electionTestLeader()
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700297 leaderList.append( leaderN )
298 if leaderN == main.FALSE:
299 # error in response
300 main.log.error( "Something is wrong with " +
301 "electionTestLeader function, check the" +
302 " error logs" )
303 leaderResult = main.FALSE
304 elif leaderN is None:
Jon Halla478b852017-12-04 15:00:15 -0800305 main.log.error( ctrl.name +
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700306 " shows no leader for the election-app." )
307 leaderResult = main.FALSE
308 if len( set( leaderList ) ) != 1:
309 leaderResult = main.FALSE
310 main.log.error(
311 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800312 main.log.debug( leaderList )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700313 utilities.assert_equals(
314 expect=main.TRUE,
315 actual=leaderResult,
316 onpass="Leadership election passed",
317 onfail="Something went wrong with Leadership election" )
318
319 def CASE8( self, main ):
320 """
321 Compare topo
322 """
Devin Lim58046fa2017-07-05 16:55:00 -0700323 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700324
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700325 def CASE9( self, main ):
326 """
Jon Hallab611372018-02-21 15:26:05 -0800327 Link down
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700328 """
Jon Hallab611372018-02-21 15:26:05 -0800329 src = main.params['kill']['linkSrc']
330 dst = main.params['kill']['linkDst']
331 main.HA.linkDown( main, src, dst )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700332
333 def CASE10( self, main ):
334 """
Jon Hallab611372018-02-21 15:26:05 -0800335 Link up
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700336 """
Jon Hallab611372018-02-21 15:26:05 -0800337 src = main.params['kill']['linkSrc']
338 dst = main.params['kill']['linkDst']
339 main.HA.linkUp( main, src, dst )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700340
341 def CASE11( self, main ):
342 """
343 Switch Down
344 """
345 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700346 main.HA.switchDown( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700347
348 def CASE12( self, main ):
349 """
350 Switch Up
351 """
352 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700353 main.HA.switchUp( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700354
355 def CASE13( self, main ):
356 """
357 Clean up
358 """
Devin Lim58046fa2017-07-05 16:55:00 -0700359 main.HA.cleanUp( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700360
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700361 def CASE14( self, main ):
362 """
Jon Hallab611372018-02-21 15:26:05 -0800363 Start election app on all onos nodes
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700364 """
Devin Lim58046fa2017-07-05 16:55:00 -0700365 main.HA.startElectionApp( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700366
367 def CASE15( self, main ):
368 """
369 Check that Leadership Election is still functional
370 15.1 Run election on each node
371 15.2 Check that each node has the same leaders and candidates
372 15.3 Find current leader and withdraw
373 15.4 Check that a new node was elected leader
374 15.5 Check that that new leader was the candidate of old leader
375 15.6 Run for election on old leader
376 15.7 Check that oldLeader is a candidate, and leader if only 1 node
377 15.8 Make sure that the old leader was added to the candidate list
378
379 old and new variable prefixes refer to data from before vs after
380 withdrawl and later before withdrawl vs after re-election
381 """
Devin Lim58046fa2017-07-05 16:55:00 -0700382 main.HA.isElectionFunctional( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700383
384 def CASE16( self, main ):
385 """
386 Install Distributed Primitives app
387 """
Devin Lim58046fa2017-07-05 16:55:00 -0700388 main.HA.installDistributedPrimitiveApp( main )
Jon Hall9ebd1bd2016-04-19 01:37:17 -0700389
390 def CASE17( self, main ):
391 """
392 Check for basic functionality with distributed primitives
393 """
Devin Lim58046fa2017-07-05 16:55:00 -0700394 main.HA.checkDistPrimitivesFunc( main )