blob: cf94aa95af35606e9f23004fb2d7fd690a36c9db [file] [log] [blame]
Jon Hall0e240372018-05-02 11:21:57 -07001"""
2Copyright 2018 Open Networking Foundation ( ONF )
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 ( at your option ) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21"""
22Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
46class HApowerFailure:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
69 main.log.info( "ONOS HA test: Simulate a power failure on a minority of ONOS nodes - " +
70 "initialization" )
71 # These are for csv plotting in jenkins
72 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
79 main.cleanAndExit()
80 main.testSetUp.envSetupDescription()
81 try:
82 from tests.HA.dependencies.HA import HA
83 main.HA = HA()
84 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
86 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
87 except Exception as e:
88 main.testSetUp.envSetupException( e )
Jon Hallaa1d9b82020-07-30 13:49:42 -070089 main.testSetUp.envSetupConclusion( stepResult )
Jon Hall0e240372018-05-02 11:21:57 -070090
Jon Hall5a5c8432018-11-28 11:39:57 -080091 applyFuncs = [ main.HA.removeKarafConsoleLogging,
92 main.HA.customizeOnosGenPartitions,
Jon Hall0e240372018-05-02 11:21:57 -070093 main.HA.copyBackupConfig,
94 main.ONOSbench.preventAutoRespawn ]
Jon Hall5a5c8432018-11-28 11:39:57 -080095 applyArgs = [ None, None, None, None ]
Jon Hall0e240372018-05-02 11:21:57 -070096 try:
97 if main.params[ 'topology' ][ 'topoFile' ]:
98 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
99 else:
100 applyFuncs.append( main.HA.startingMininet )
101 applyArgs.append( None )
102 except (KeyError, IndexError):
103 applyFuncs.append( main.HA.startingMininet )
104 applyArgs.append( None )
105
Jon Hall3e6edb32018-08-21 16:20:30 -0700106 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName,
Jon Hall0e240372018-05-02 11:21:57 -0700107 extraApply=applyFuncs,
108 applyArgs=applyArgs,
109 extraClean=main.HA.cleanUpGenPartition,
110 includeCaseDesc=False )
111 main.HA.initialSetUp( serviceClean=True )
112
113 main.step( 'Set logging levels' )
114 logging = True
115 try:
116 logs = main.params.get( 'ONOS_Logging', False )
117 if logs:
118 for namespace, level in logs.items():
119 for ctrl in main.Cluster.active():
120 ctrl.CLI.logSet( level, namespace )
121 except AttributeError:
122 logging = False
123 utilities.assert_equals( expect=True, actual=logging,
124 onpass="Set log levels",
125 onfail="Failed to set log levels" )
126
127 def CASE2( self, main ):
128 """
129 Assign devices to controllers
130 """
131 main.HA.assignDevices( main )
132
133 def CASE102( self, main ):
134 """
135 Set up Spine-Leaf fabric topology in Mininet
136 """
137 main.HA.startTopology( main )
138
139 def CASE21( self, main ):
140 """
141 Assign mastership to controllers
142 """
143 main.HA.assignMastership( main )
144
145 def CASE3( self, main ):
146 """
147 Assign intents
148 """
149 main.HA.assignIntents( main )
150
151 def CASE4( self, main ):
152 """
153 Ping across added host intents
154 """
155 main.HA.pingAcrossHostIntent( main )
156
157 def CASE104( self, main ):
158 """
159 Ping Hosts
160 """
161 main.case( "Check connectivity" )
162 main.step( "Ping between all hosts" )
163 pingResult = main.Mininet1.pingall()
164 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
165 onpass="All Pings Passed",
166 onfail="Failed to ping between all hosts" )
167
168 def CASE5( self, main ):
169 """
170 Reading state of ONOS
171 """
172 main.HA.readingState( main )
173
174 def CASE61( self, main ):
175 """
176 The Failure case.
177 """
178 assert main, "main not defined"
179 assert utilities.assert_equals, "utilities.assert_equals not defined"
180 main.case( "Simulate a power failure on a minority of ONOS nodes" )
181
182 main.step( "Checking ONOS Logs for errors" )
183 for ctrl in main.Cluster.active():
184 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
185 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
186
187 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
188 n = len( main.Cluster.runningNodes ) # Number of nodes
189 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
190 if n > 3:
191 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
192 # NOTE: This only works for cluster sizes of 3,5, or 7.
193
194 # NOTE: This is to fix an issue with wiki formating
195 nodeNames = [ node.name for node in main.kill ]
196 # Set the env variables so we actually use the warden power ON/OFF functionality
197 # NOTE: Only works with warden
198 main.ONOSbench.setEnv( "HARD_POWER_OFF", "True" )
199 main.ONOSbench.setEnv( "ONOS_CELL", "borrow" )
200 main.step( "Killing nodes: " + str( nodeNames ) )
201 killResults = main.TRUE
202 userName = main.params[ 'cell' ][ 'user' ]
203 for ctrl in main.kill:
204 killResults = killResults and\
205 main.ONOSbench.onosPower( ctrl.ipAddress, "off", userName )
206 ctrl.active = False
207 main.Cluster.reset()
208 utilities.assert_equals( expect=main.TRUE, actual=killResults,
209 onpass="ONOS nodes killed successfully",
210 onfail="ONOS nodes NOT successfully killed" )
211
Jon Hall30668ff2019-02-27 17:43:09 -0800212 main.testSetUp.checkOnosNodes( main.Cluster )
Jon Hall0e240372018-05-02 11:21:57 -0700213
214 if not nodeResults:
215 for ctrl in main.Cluster.active():
216 main.log.debug( "{} components not ACTIVE: \n{}".format(
217 ctrl.name,
Jon Hall6c9e2da2018-11-06 12:01:23 -0800218 ctrl.CLI.sendline( "onos:scr-list | grep -v ACTIVE" ) ) )
Jon Hall0e240372018-05-02 11:21:57 -0700219 main.log.error( "Failed to start ONOS, stopping test" )
220 main.cleanAndExit()
221
222 for i in range( 1, 100 ):
223 main.Cluster.next().summary()
224 for i in range( 1, 100 ):
225 main.Cluster.next().partitions()
226 for ctrl in main.Cluster.active():
227 main.log.warn( repr( ctrl ) )
228
229 def CASE62( self, main ):
230 """
231 The bring up stopped nodes
232 """
233 userName = main.params[ 'cell' ][ 'user' ]
234 # NOTE: The warden will actually power up in reverse alphabetical order of container
235 # names in a cell, ignoring the ip given.
236 for ctrl in main.kill:
237 main.ONOSbench.onosPower( ctrl.ipAddress, "on", userName )
238 for component in [ ctrl.CLI, ctrl.server ]:
239 component.connect()
240 main.HA.bringUpStoppedNodes( main )
241 for ctrl in main.Cluster.active():
242 main.log.warn( repr( ctrl ) )
243
244 def CASE7( self, main ):
245 """
246 Check state after ONOS failure
247 """
248 try:
249 main.kill
250 except AttributeError:
251 main.kill = []
252
253 main.HA.checkStateAfterEvent( main, afterWhich=0 )
254 main.step( "Leadership Election is still functional" )
255 # Test of LeadershipElection
256 leaderList = []
257
258 restarted = []
259 for ctrl in main.kill:
260 restarted.append( ctrl.ipAddress )
261 leaderResult = main.TRUE
262
263 for ctrl in main.Cluster.active():
264 leaderN = ctrl.electionTestLeader()
265 leaderList.append( leaderN )
266 if leaderN == main.FALSE:
267 # error in response
268 main.log.error( "Something is wrong with " +
269 "electionTestLeader function, check the" +
270 " error logs" )
271 leaderResult = main.FALSE
272 elif leaderN is None:
273 main.log.error( ctrl.name +
274 " shows no leader for the election-app was" +
275 " elected after the old one died" )
276 leaderResult = main.FALSE
277 elif leaderN in restarted:
278 main.log.error( ctrl.name + " shows " + str( leaderN ) +
279 " as leader for the election-app, but it " +
280 "was restarted" )
281 leaderResult = main.FALSE
282 if len( set( leaderList ) ) != 1:
283 leaderResult = main.FALSE
284 main.log.error(
285 "Inconsistent view of leader for the election test app" )
286 main.log.debug( leaderList )
287 utilities.assert_equals(
288 expect=main.TRUE,
289 actual=leaderResult,
290 onpass="Leadership election passed",
291 onfail="Something went wrong with Leadership election" )
292
293 def CASE8( self, main ):
294 """
295 Compare topo
296 """
297 main.HA.compareTopo( main )
298
299 def CASE9( self, main ):
300 """
301 Link down
302 """
303 src = main.params['kill']['linkSrc']
304 dst = main.params['kill']['linkDst']
305 main.HA.linkDown( main, src, dst )
306
307 def CASE10( self, main ):
308 """
309 Link up
310 """
311 src = main.params['kill']['linkSrc']
312 dst = main.params['kill']['linkDst']
313 main.HA.linkUp( main, src, dst )
314
315 def CASE11( self, main ):
316 """
317 Switch Down
318 """
319 # NOTE: You should probably run a topology check after this
320 main.HA.switchDown( main )
321
322 def CASE12( self, main ):
323 """
324 Switch Up
325 """
326 # NOTE: You should probably run a topology check after this
327 main.HA.switchUp( main )
328
329 def CASE13( self, main ):
330 """
331 Clean up
332 """
333 main.HA.cleanUp( main )
334
335 def CASE14( self, main ):
336 """
337 Start election app on all onos nodes
338 """
339 main.HA.startElectionApp( main )
340
341 def CASE15( self, main ):
342 """
343 Check that Leadership Election is still functional
344 15.1 Run election on each node
345 15.2 Check that each node has the same leaders and candidates
346 15.3 Find current leader and withdraw
347 15.4 Check that a new node was elected leader
348 15.5 Check that that new leader was the candidate of old leader
349 15.6 Run for election on old leader
350 15.7 Check that oldLeader is a candidate, and leader if only 1 node
351 15.8 Make sure that the old leader was added to the candidate list
352
353 old and new variable prefixes refer to data from before vs after
354 withdrawl and later before withdrawl vs after re-election
355 """
356 main.HA.isElectionFunctional( main )
357
358 def CASE16( self, main ):
359 """
360 Install Distributed Primitives app
361 """
362 main.HA.installDistributedPrimitiveApp( main )
363
364 def CASE17( self, main ):
365 """
366 Check for basic functionality with distributed primitives
367 """
368 main.HA.checkDistPrimitivesFunc( main )