blob: 23d30b58bb2026ce45f9ad0df6900e5dcb10df60 [file] [log] [blame]
Jon Hallb5488012017-06-21 14:08:36 -07001"""
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -07002Copyright 2017 Open Networking Foundation ( ONF )
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07003
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
Jeremy Ronquillo23fb2162017-09-15 14:59:57 -070011 ( at your option ) any later version.
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070012
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -070021"""
Jon Hallb5488012017-06-21 14:08:36 -070022Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
46class HAcontinuousStopNodes:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
Jon Hallb5488012017-06-21 14:08:36 -070069 main.log.info( "ONOS HA test: Stop a minority of ONOS nodes - " +
70 "initialization" )
Jon Hallb5488012017-06-21 14:08:36 -070071 # set global variables
Jon Hallb5488012017-06-21 14:08:36 -070072 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070073 main.HAlabels = []
74 main.HAdata = []
75 try:
76 from tests.dependencies.ONOSSetup import ONOSSetup
77 main.testSetUp = ONOSSetup()
78 except ImportError:
79 main.log.error( "ONOSSetup not found. exiting the test" )
Devin Lim44075962017-08-11 10:56:37 -070080 main.cleanAndExit()
Devin Lim58046fa2017-07-05 16:55:00 -070081 main.testSetUp.envSetupDescription()
Jon Hallb5488012017-06-21 14:08:36 -070082 try:
83 from tests.HA.dependencies.HA import HA
84 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070085 # load some variables from the params file
86 cellName = main.params[ 'ENV' ][ 'cellName' ]
87 main.apps = main.params[ 'ENV' ][ 'appString' ]
88 main.numCtrls = int( main.params[ 'num_controllers' ] )
Jon Hallab611372018-02-21 15:26:05 -080089 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
Jon Hallb5488012017-06-21 14:08:36 -070090 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -070091 main.testSetUp.envSetupException( e )
92 main.testSetUp.evnSetupConclusion( stepResult )
Jon Hallb5488012017-06-21 14:08:36 -070093
Jon Hallab611372018-02-21 15:26:05 -080094 applyFuncs = [ main.HA.customizeOnosGenPartitions, main.HA.copyBackupConfig ]
95 applyArgs = [ None, None ]
96 try:
97 if main.params[ 'topology' ][ 'topoFile' ]:
98 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
99 else:
100 applyFuncs.append( main.HA.startingMininet )
101 applyArgs.append( None )
102 except (KeyError, IndexError):
103 applyFuncs.append( main.HA.startingMininet )
104 applyArgs.append( None )
105
You Wanga0f6ff62018-01-11 15:46:30 -0800106 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName, removeLog=True,
Jon Hallab611372018-02-21 15:26:05 -0800107 extraApply=applyFuncs,
108 applyArgs=applyArgs,
109 extraClean=main.HA.cleanUpGenPartition,
110 includeCaseDesc=False )
111 main.HA.initialSetUp( serviceClean=True )
112
113 main.step( 'Set logging levels' )
114 logging = True
115 try:
116 logs = main.params.get( 'ONOS_Logging', False )
117 if logs:
118 for namespace, level in logs.items():
119 for ctrl in main.Cluster.active():
120 ctrl.CLI.logSet( level, namespace )
121 except AttributeError:
122 logging = False
123 utilities.assert_equals( expect=True, actual=logging,
124 onpass="Set log levels",
125 onfail="Failed to set log levels" )
Jon Hallb5488012017-06-21 14:08:36 -0700126
Jon Hallb5488012017-06-21 14:08:36 -0700127 def CASE2( self, main ):
128 """
129 Assign devices to controllers
130 """
Devin Lim58046fa2017-07-05 16:55:00 -0700131 main.HA.assignDevices( main )
Jon Hallb5488012017-06-21 14:08:36 -0700132
Jon Hallab611372018-02-21 15:26:05 -0800133 def CASE102( self, main ):
134 """
135 Set up Spine-Leaf fabric topology in Mininet
136 """
137 main.HA.startTopology( main )
138
Jon Hallb5488012017-06-21 14:08:36 -0700139 def CASE21( self, main ):
140 """
141 Assign mastership to controllers
142 """
Devin Lim58046fa2017-07-05 16:55:00 -0700143 main.HA.assignMastership( main )
Jon Hallb5488012017-06-21 14:08:36 -0700144
145 def CASE3( self, main ):
146 """
147 Assign intents
148 """
Devin Lim58046fa2017-07-05 16:55:00 -0700149 main.HA.assignIntents( main )
Jon Hallb5488012017-06-21 14:08:36 -0700150
151 def CASE4( self, main ):
152 """
153 Ping across added host intents
154 """
Jon Hallca319892017-06-15 15:25:22 -0700155 main.HA.pingAcrossHostIntent( main )
Jon Hallb5488012017-06-21 14:08:36 -0700156
Jon Hallab611372018-02-21 15:26:05 -0800157 def CASE104( self, main ):
158 """
159 Ping Hosts
160 """
161 main.case( "Check connectivity" )
162 main.step( "Ping between all hosts" )
163 pingResult = main.Mininet1.pingall()
164 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
165 onpass="All Pings Passed",
166 onfail="Failed to ping between all hosts" )
167
Jon Hallb5488012017-06-21 14:08:36 -0700168 def CASE5( self, main ):
169 """
170 Reading state of ONOS
171 """
Devin Lim58046fa2017-07-05 16:55:00 -0700172 main.HA.readingState( main )
Jon Hallb5488012017-06-21 14:08:36 -0700173
174 def CASE61( self, main ):
175 """
176 The Failure case.
177 """
Jon Hallb5488012017-06-21 14:08:36 -0700178 assert main, "main not defined"
179 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Hallb5488012017-06-21 14:08:36 -0700180 try:
181 assert main.nodeIndex is not None, "main.nodeIndex not defined"
182 assert main.killCount is not None, "main.killCount not defined"
Jon Halla478b852017-12-04 15:00:15 -0800183 except AttributeError:
Jon Hallb5488012017-06-21 14:08:36 -0700184 main.log.warn( "Node to kill not selected, defaulting to node 1" )
Jon Hall5d5876e2017-11-30 09:33:16 -0800185 main.nodeIndex = -1
Jon Hallb5488012017-06-21 14:08:36 -0700186 main.killCount = 1
187
188 main.case( "Stopping ONOS nodes - iteration " + str( main.killCount ) )
189
190 main.step( "Checking ONOS Logs for errors" )
Devin Lim142b5342017-07-20 15:22:39 -0700191 for ctrl in main.Cluster.runningNodes:
192 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
193 main.log.warn( main.ONOSbench.checkLogs( ctrl.ipAddress ) )
Jon Hallb5488012017-06-21 14:08:36 -0700194
195 # NOTE: For now only kill one node. If we move to killing more, we need to
196 # make sure we don't lose any partitions
Devin Lim142b5342017-07-20 15:22:39 -0700197 n = len( main.Cluster.runningNodes ) # Number of nodes
Jon Hallb5488012017-06-21 14:08:36 -0700198 main.nodeIndex = ( main.nodeIndex + 1 ) % n
Devin Lim142b5342017-07-20 15:22:39 -0700199 main.kill = [ main.Cluster.runningNodes[ main.nodeIndex ] ] # ONOS node to kill, listed by index in main.nodes
Jon Hallb5488012017-06-21 14:08:36 -0700200
201 # TODO: Be able to configure bringing up old node vs. a new/fresh installation
202 main.step( "Stopping " + str( len( main.kill ) ) + " ONOS nodes" )
203 killResults = main.TRUE
Devin Lim142b5342017-07-20 15:22:39 -0700204 for ctrl in main.kill:
Jon Hallb5488012017-06-21 14:08:36 -0700205 killResults = killResults and\
Devin Lim142b5342017-07-20 15:22:39 -0700206 main.ONOSbench.onosStop( ctrl.ipAddress )
207 ctrl.active = False
Jon Hallb5488012017-06-21 14:08:36 -0700208 utilities.assert_equals( expect=main.TRUE, actual=killResults,
209 onpass="ONOS nodes stopped successfully",
210 onfail="ONOS nodes NOT successfully stopped" )
Jon Hall5d5876e2017-11-30 09:33:16 -0800211 main.Cluster.reset()
Jon Hallb5488012017-06-21 14:08:36 -0700212
213 main.step( "Checking ONOS nodes" )
Devin Lim3ebd5e72017-11-14 10:38:00 -0800214 nodeResults = utilities.retry( main.Cluster.nodesCheck,
Jon Hallb5488012017-06-21 14:08:36 -0700215 False,
Jon Hallb5488012017-06-21 14:08:36 -0700216 sleep=15,
217 attempts=5 )
218
219 utilities.assert_equals( expect=True, actual=nodeResults,
220 onpass="Nodes check successful",
221 onfail="Nodes check NOT successful" )
222
223 if not nodeResults:
Devin Lim142b5342017-07-20 15:22:39 -0700224 for ctrl in main.Cluster.active():
Jon Hallb5488012017-06-21 14:08:36 -0700225 main.log.debug( "{} components not ACTIVE: \n{}".format(
Devin Lim142b5342017-07-20 15:22:39 -0700226 ctrl.name,
227 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
Jon Hallb5488012017-06-21 14:08:36 -0700228 main.log.error( "Failed to start ONOS, stopping test" )
Devin Lim44075962017-08-11 10:56:37 -0700229 main.cleanAndExit()
Jon Hallb5488012017-06-21 14:08:36 -0700230
231 main.killCount += 1
232
233 def CASE62( self, main ):
234 """
235 The bring up stopped nodes
236 """
Jon Hall5d5876e2017-11-30 09:33:16 -0800237 main.HA.bringUpStoppedNodes( main )
Jon Hallb5488012017-06-21 14:08:36 -0700238
239 def CASE7( self, main ):
240 """
241 Check state after ONOS failure
242 """
Jon Hallb5488012017-06-21 14:08:36 -0700243 try:
244 main.kill
245 except AttributeError:
246 main.kill = []
247
Devin Lim142b5342017-07-20 15:22:39 -0700248 main.HA.checkStateAfterEvent( main, afterWhich=0 )
Jon Hallb5488012017-06-21 14:08:36 -0700249
Jon Hallb5488012017-06-21 14:08:36 -0700250 main.step( "Leadership Election is still functional" )
251 # Test of LeadershipElection
252 leaderList = []
253
Jon Hallb5488012017-06-21 14:08:36 -0700254 leaderResult = main.TRUE
255
Devin Lim142b5342017-07-20 15:22:39 -0700256 for ctrl in main.Cluster.active():
257 leaderN = ctrl.CLI.electionTestLeader()
Jon Hallb5488012017-06-21 14:08:36 -0700258 leaderList.append( leaderN )
259 if leaderN == main.FALSE:
260 # error in response
261 main.log.error( "Something is wrong with " +
262 "electionTestLeader function, check the" +
263 " error logs" )
264 leaderResult = main.FALSE
265 elif leaderN is None:
Devin Lim142b5342017-07-20 15:22:39 -0700266 main.log.error( ctrl.name +
Jon Hallb5488012017-06-21 14:08:36 -0700267 " shows no leader for the election-app was" +
268 " elected after the old one died" )
269 leaderResult = main.FALSE
Jon Hallb5488012017-06-21 14:08:36 -0700270 if len( set( leaderList ) ) != 1:
271 leaderResult = main.FALSE
272 main.log.error(
273 "Inconsistent view of leader for the election test app" )
Jon Hallab611372018-02-21 15:26:05 -0800274 main.log.debug( leaderList )
Jon Hallb5488012017-06-21 14:08:36 -0700275 utilities.assert_equals(
276 expect=main.TRUE,
277 actual=leaderResult,
278 onpass="Leadership election passed",
279 onfail="Something went wrong with Leadership election" )
280
281 def CASE8( self, main ):
282 """
283 Compare topo
284 """
Devin Lim58046fa2017-07-05 16:55:00 -0700285 main.HA.compareTopo( main )
Jon Hallb5488012017-06-21 14:08:36 -0700286
Jon Hallb5488012017-06-21 14:08:36 -0700287 def CASE9( self, main ):
288 """
Jon Hallab611372018-02-21 15:26:05 -0800289 Link down
Jon Hallb5488012017-06-21 14:08:36 -0700290 """
Jon Hallab611372018-02-21 15:26:05 -0800291 src = main.params['kill']['linkSrc']
292 dst = main.params['kill']['linkDst']
293 main.HA.linkDown( main, src, dst )
Jon Hallb5488012017-06-21 14:08:36 -0700294
295 def CASE10( self, main ):
296 """
Jon Hallab611372018-02-21 15:26:05 -0800297 Link up
Jon Hallb5488012017-06-21 14:08:36 -0700298 """
Jon Hallab611372018-02-21 15:26:05 -0800299 src = main.params['kill']['linkSrc']
300 dst = main.params['kill']['linkDst']
301 main.HA.linkUp( main, src, dst )
Jon Hallb5488012017-06-21 14:08:36 -0700302
303 def CASE11( self, main ):
304 """
305 Switch Down
306 """
307 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700308 main.HA.switchDown( main )
Jon Hallb5488012017-06-21 14:08:36 -0700309
310 def CASE12( self, main ):
311 """
312 Switch Up
313 """
314 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700315 main.HA.switchUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700316
317 def CASE13( self, main ):
318 """
319 Clean up
320 """
Devin Lim58046fa2017-07-05 16:55:00 -0700321 main.HAlabels.append( "Restart" )
322 main.HAdata.append( str( main.restartTime ) )
323 main.HA.cleanUp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700324
325 def CASE14( self, main ):
326 """
Jon Hallab611372018-02-21 15:26:05 -0800327 Start election app on all onos nodes
Jon Hallb5488012017-06-21 14:08:36 -0700328 """
Devin Lim58046fa2017-07-05 16:55:00 -0700329 main.HA.startElectionApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700330
331 def CASE15( self, main ):
332 """
333 Check that Leadership Election is still functional
334 15.1 Run election on each node
335 15.2 Check that each node has the same leaders and candidates
336 15.3 Find current leader and withdraw
337 15.4 Check that a new node was elected leader
338 15.5 Check that that new leader was the candidate of old leader
339 15.6 Run for election on old leader
340 15.7 Check that oldLeader is a candidate, and leader if only 1 node
341 15.8 Make sure that the old leader was added to the candidate list
342
343 old and new variable prefixes refer to data from before vs after
344 withdrawl and later before withdrawl vs after re-election
345 """
Devin Lim58046fa2017-07-05 16:55:00 -0700346 main.HA.isElectionFunctional( main )
Jon Hallb5488012017-06-21 14:08:36 -0700347
348 def CASE16( self, main ):
349 """
350 Install Distributed Primitives app
351 """
Devin Lim58046fa2017-07-05 16:55:00 -0700352 main.HA.installDistributedPrimitiveApp( main )
Jon Hallb5488012017-06-21 14:08:36 -0700353
354 def CASE17( self, main ):
355 """
356 Check for basic functionality with distributed primitives
357 """
Devin Lim58046fa2017-07-05 16:55:00 -0700358 main.HA.checkDistPrimitivesFunc( main )