blob: 998f29cec07d057a64c2f4797e1362d8173dfb5b [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2015 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hall5cf14d52015-07-16 12:15:19 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070032CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070034CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070046class HAstopNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070047
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -070069 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070070 "initialization" )
Jon Halla440e872016-03-31 15:15:50 -070071 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070072 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070079 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070080 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070081 try:
Jon Hall53c5e662016-04-13 16:06:56 -070082 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070083 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070084 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
Jon Hallab611372018-02-21 15:26:05 -080086 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Halle1a3b752015-07-22 13:02:46 -070087 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070088 main.testSetUp.envSetupException( e )
89 main.testSetUp.evnSetupConclusion( stepResult )
Jon Halle1a3b752015-07-22 13:02:46 -070090
Jon Hallab611372018-02-21 15:26:05 -080091 applyFuncs = [ main.HA.customizeOnosGenPartitions, main.HA.copyBackupConfig ]
92 applyArgs = [ None, None ]
93 try:
94 if main.params[ 'topology' ][ 'topoFile' ]:
95 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
96 else:
97 applyFuncs.append( main.HA.startingMininet )
98 applyArgs.append( None )
99 except (KeyError, IndexError):
100 applyFuncs.append( main.HA.startingMininet )
101 applyArgs.append( None )
Jon Hall5cf14d52015-07-16 12:15:19 -0700102
Jon Hall3e6edb32018-08-21 16:20:30 -0700103 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName,
Jon Hallab611372018-02-21 15:26:05 -0800104 extraApply=applyFuncs,
105 applyArgs=applyArgs,
106 extraClean=main.HA.cleanUpGenPartition,
107 includeCaseDesc=False )
Jon Hall4f360bc2017-09-07 10:19:52 -0700108 main.HA.initialSetUp( serviceClean=True )
Jon Hall9d2dcad2016-04-08 10:15:20 -0700109
Jon Hallab611372018-02-21 15:26:05 -0800110 main.step( 'Set logging levels' )
111 logging = True
112 try:
113 logs = main.params.get( 'ONOS_Logging', False )
114 if logs:
115 for namespace, level in logs.items():
116 for ctrl in main.Cluster.active():
117 ctrl.CLI.logSet( level, namespace )
118 except AttributeError:
119 logging = False
120 utilities.assert_equals( expect=True, actual=logging,
121 onpass="Set log levels",
122 onfail="Failed to set log levels" )
123
Jon Hall5cf14d52015-07-16 12:15:19 -0700124 def CASE2( self, main ):
125 """
126 Assign devices to controllers
127 """
Devin Lim58046fa2017-07-05 16:55:00 -0700128 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700129
Jon Hallab611372018-02-21 15:26:05 -0800130 def CASE102( self, main ):
131 """
132 Set up Spine-Leaf fabric topology in Mininet
133 """
134 main.HA.startTopology( main )
135
Jon Hall5cf14d52015-07-16 12:15:19 -0700136 def CASE21( self, main ):
137 """
138 Assign mastership to controllers
139 """
Devin Lim58046fa2017-07-05 16:55:00 -0700140 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700141
Jon Hall5cf14d52015-07-16 12:15:19 -0700142 def CASE3( self, main ):
143 """
144 Assign intents
145 """
Devin Lim58046fa2017-07-05 16:55:00 -0700146 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700147
Jon Hall5cf14d52015-07-16 12:15:19 -0700148 def CASE4( self, main ):
149 """
150 Ping across added host intents
151 """
Jon Hallca319892017-06-15 15:25:22 -0700152 main.HA.pingAcrossHostIntent( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700153
Jon Hallab611372018-02-21 15:26:05 -0800154 def CASE104( self, main ):
155 """
156 Ping Hosts
157 """
158 main.case( "Check connectivity" )
159 main.step( "Ping between all hosts" )
160 pingResult = main.Mininet1.pingall()
161 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
162 onpass="All Pings Passed",
163 onfail="Failed to ping between all hosts" )
164
Jon Hall5cf14d52015-07-16 12:15:19 -0700165 def CASE5( self, main ):
166 """
167 Reading state of ONOS
168 """
Devin Lim58046fa2017-07-05 16:55:00 -0700169 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700170
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700171 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700172 """
173 The Failure case.
174 """
Jon Hall5cf14d52015-07-16 12:15:19 -0700175 assert main, "main not defined"
176 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700177 main.case( "Stop minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700178
179 main.step( "Checking ONOS Logs for errors" )
Jon Hallca319892017-06-15 15:25:22 -0700180 for ctrl in main.Cluster.active():
181 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
182 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
Jon Hall96091e62015-09-21 17:34:17 -0700183
Devin Lim142b5342017-07-20 15:22:39 -0700184 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700185 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
Devin Lim142b5342017-07-20 15:22:39 -0700186 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hall3b489db2015-10-05 14:38:37 -0700187 if n > 3:
Devin Lim142b5342017-07-20 15:22:39 -0700188 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
Jon Hall3b489db2015-10-05 14:38:37 -0700189 # NOTE: This only works for cluster sizes of 3,5, or 7.
190
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -0700191 # NOTE: This is to fix an issue with wiki formating
Jon Hall4f360bc2017-09-07 10:19:52 -0700192 nodeNames = [ node.name for node in main.kill ]
193 main.step( "Stopping nodes: " + str( nodeNames ) )
Jon Hall3b489db2015-10-05 14:38:37 -0700194 killResults = main.TRUE
Jon Hallca319892017-06-15 15:25:22 -0700195 for ctrl in main.kill:
Jon Hall3b489db2015-10-05 14:38:37 -0700196 killResults = killResults and\
Jon Hallca319892017-06-15 15:25:22 -0700197 ctrl.onosStop( ctrl.ipAddress )
198 ctrl.active = False
Jon Halla1e8e512018-05-11 13:30:57 -0700199 main.Cluster.reset()
Jon Hall5cf14d52015-07-16 12:15:19 -0700200 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700201 onpass="ONOS nodes stopped successfully",
202 onfail="ONOS nodes NOT successfully stopped" )
203
Jon Halld2871c22016-07-26 11:01:14 -0700204 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800205 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Halld2871c22016-07-26 11:01:14 -0700206 False,
Jon Halld2871c22016-07-26 11:01:14 -0700207 sleep=15,
Jon Hallab611372018-02-21 15:26:05 -0800208 attempts=50 )
Jon Halld2871c22016-07-26 11:01:14 -0700209
210 utilities.assert_equals( expect=True, actual=nodeResults,
211 onpass="Nodes check successful",
212 onfail="Nodes check NOT successful" )
213
214 if not nodeResults:
Jon Hallca319892017-06-15 15:25:22 -0700215 for ctrl in main.Cluster.active():
Jon Halld2871c22016-07-26 11:01:14 -0700216 main.log.debug( "{} components not ACTIVE: \n{}".format(
Jon Hallca319892017-06-15 15:25:22 -0700217 ctrl.name,
Jon Hall6c9e2da2018-11-06 12:01:23 -0800218 ctrl.CLI.sendline( "onos:scr-list | grep -v ACTIVE" ) ) )
Jon Halld2871c22016-07-26 11:01:14 -0700219 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700220 main.cleanAndExit()
Jon Halld2871c22016-07-26 11:01:14 -0700221
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700222 def CASE62( self, main ):
223 """
224 The bring up stopped nodes
225 """
Jon Hall5d5876e2017-11-30 09:33:16 -0800226 main.HA.bringUpStoppedNodes( main )
Jon Hallab611372018-02-21 15:26:05 -0800227 main.step( "Checking ONOS nodes" )
228 nodeResults = utilities.retry( main.Cluster.nodesCheck,
229 False,
230 sleep=15,
231 attempts=50 )
Jon Halla440e872016-03-31 15:15:50 -0700232
Jon Hall5cf14d52015-07-16 12:15:19 -0700233 def CASE7( self, main ):
234 """
235 Check state after ONOS failure
236 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700237 try:
238 main.kill
239 except AttributeError:
240 main.kill = []
241
Devin Lim142b5342017-07-20 15:22:39 -0700242 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700243 main.step( "Leadership Election is still functional" )
244 # Test of LeadershipElection
245 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700246
Jon Hall3b489db2015-10-05 14:38:37 -0700247 restarted = []
Jon Hallca319892017-06-15 15:25:22 -0700248 for ctrl in main.kill:
249 restarted.append( ctrl.ipAddress )
Jon Hall5cf14d52015-07-16 12:15:19 -0700250 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700251
Jon Hallca319892017-06-15 15:25:22 -0700252 for ctrl in main.Cluster.active():
253 leaderN = ctrl.electionTestLeader()
Jon Hall5cf14d52015-07-16 12:15:19 -0700254 leaderList.append( leaderN )
255 if leaderN == main.FALSE:
256 # error in response
257 main.log.error( "Something is wrong with " +
258 "electionTestLeader function, check the" +
259 " error logs" )
260 leaderResult = main.FALSE
261 elif leaderN is None:
Jon Hallca319892017-06-15 15:25:22 -0700262 main.log.error( ctrl.name +
Jon Hall5cf14d52015-07-16 12:15:19 -0700263 " shows no leader for the election-app was" +
264 " elected after the old one died" )
265 leaderResult = main.FALSE
266 elif leaderN in restarted:
Jon Hallca319892017-06-15 15:25:22 -0700267 main.log.error( ctrl.name + " shows " + str( leaderN ) +
Jon Hall5cf14d52015-07-16 12:15:19 -0700268 " as leader for the election-app, but it " +
269 "was restarted" )
270 leaderResult = main.FALSE
271 if len( set( leaderList ) ) != 1:
272 leaderResult = main.FALSE
273 main.log.error(
274 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800275 main.log.debug( leaderList )
Jon Hall5cf14d52015-07-16 12:15:19 -0700276 utilities.assert_equals(
277 expect=main.TRUE,
278 actual=leaderResult,
279 onpass="Leadership election passed",
280 onfail="Something went wrong with Leadership election" )
281
282 def CASE8( self, main ):
283 """
284 Compare topo
285 """
Devin Lim58046fa2017-07-05 16:55:00 -0700286 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700287
Jon Hall5cf14d52015-07-16 12:15:19 -0700288 def CASE9( self, main ):
289 """
Jon Hallab611372018-02-21 15:26:05 -0800290 Link down
Jon Hall5cf14d52015-07-16 12:15:19 -0700291 """
Jon Hallab611372018-02-21 15:26:05 -0800292 src = main.params['kill']['linkSrc']
293 dst = main.params['kill']['linkDst']
294 main.HA.linkDown( main, src, dst )
Jon Hall5cf14d52015-07-16 12:15:19 -0700295
296 def CASE10( self, main ):
297 """
Jon Hallab611372018-02-21 15:26:05 -0800298 Link up
Jon Hall5cf14d52015-07-16 12:15:19 -0700299 """
Jon Hallab611372018-02-21 15:26:05 -0800300 src = main.params['kill']['linkSrc']
301 dst = main.params['kill']['linkDst']
302 main.HA.linkUp( main, src, dst )
Jon Hall5cf14d52015-07-16 12:15:19 -0700303
304 def CASE11( self, main ):
305 """
306 Switch Down
307 """
308 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700309 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700310
311 def CASE12( self, main ):
312 """
313 Switch Up
314 """
315 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700316 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700317
318 def CASE13( self, main ):
319 """
320 Clean up
321 """
Devin Lim58046fa2017-07-05 16:55:00 -0700322 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700323
324 def CASE14( self, main ):
325 """
Jon Hallab611372018-02-21 15:26:05 -0800326 Start election app on all onos nodes
Jon Hall5cf14d52015-07-16 12:15:19 -0700327 """
Devin Lim58046fa2017-07-05 16:55:00 -0700328 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700329
330 def CASE15( self, main ):
331 """
332 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700333 15.1 Run election on each node
334 15.2 Check that each node has the same leaders and candidates
335 15.3 Find current leader and withdraw
336 15.4 Check that a new node was elected leader
337 15.5 Check that that new leader was the candidate of old leader
338 15.6 Run for election on old leader
339 15.7 Check that oldLeader is a candidate, and leader if only 1 node
340 15.8 Make sure that the old leader was added to the candidate list
341
342 old and new variable prefixes refer to data from before vs after
343 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700344 """
Devin Lim58046fa2017-07-05 16:55:00 -0700345 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700346
347 def CASE16( self, main ):
348 """
349 Install Distributed Primitives app
350 """
Devin Lim58046fa2017-07-05 16:55:00 -0700351 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700352
353 def CASE17( self, main ):
354 """
355 Check for basic functionality with distributed primitives
356 """
Devin Lim58046fa2017-07-05 16:55:00 -0700357 main.HA.checkDistPrimitivesFunc( main )