blob: e8fbe6175593d14af774cfbcd4d2410a9a12b870 [file] [log] [blame]
Jon Hall0e240372018-05-02 11:21:57 -07001"""
2Copyright 2018 Open Networking Foundation ( ONF )
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 ( at your option ) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21"""
22Description: This test is to determine if ONOS can handle
23 a minority of it's nodes restarting
24
25List of test cases:
26CASE1: Compile ONOS and push it to the test machines
27CASE2: Assign devices to controllers
28CASE21: Assign mastership to controllers
29CASE3: Assign intents
30CASE4: Ping across added host intents
31CASE5: Reading state of ONOS
32CASE61: The Failure inducing case.
33CASE62: The Failure recovery case.
34CASE7: Check state after control plane failure
35CASE8: Compare topo
36CASE9: Link s3-s28 down
37CASE10: Link s3-s28 up
38CASE11: Switch down
39CASE12: Switch up
40CASE13: Clean up
41CASE14: start election app on all onos nodes
42CASE15: Check that Leadership Election is still functional
43CASE16: Install Distributed Primitives app
44CASE17: Check for basic functionality with distributed primitives
45"""
46class HApowerFailure:
47
48 def __init__( self ):
49 self.default = ''
50
51 def CASE1( self, main ):
52 """
53 CASE1 is to compile ONOS and push it to the test machines
54
55 Startup sequence:
56 cell <name>
57 onos-verify-cell
58 NOTE: temporary - onos-remove-raft-logs
59 onos-uninstall
60 start mininet
61 git pull
62 mvn clean install
63 onos-package
64 onos-install -f
65 onos-wait-for-start
66 start cli sessions
67 start tcpdump
68 """
69 main.log.info( "ONOS HA test: Simulate a power failure on a minority of ONOS nodes - " +
70 "initialization" )
71 # These are for csv plotting in jenkins
72 main.HAlabels = []
73 main.HAdata = []
74 try:
75 from tests.dependencies.ONOSSetup import ONOSSetup
76 main.testSetUp = ONOSSetup()
77 except ImportError:
78 main.log.error( "ONOSSetup not found. exiting the test" )
79 main.cleanAndExit()
80 main.testSetUp.envSetupDescription()
81 try:
82 from tests.HA.dependencies.HA import HA
83 main.HA = HA()
84 cellName = main.params[ 'ENV' ][ 'cellName' ]
85 main.apps = main.params[ 'ENV' ][ 'appString' ]
86 stepResult = main.testSetUp.envSetup( includeCaseDesc=False )
87 except Exception as e:
88 main.testSetUp.envSetupException( e )
89 main.testSetUp.evnSetupConclusion( stepResult )
90
91 applyFuncs = [ main.HA.customizeOnosGenPartitions,
92 main.HA.copyBackupConfig,
93 main.ONOSbench.preventAutoRespawn ]
94 applyArgs = [ None, None, None ]
95 try:
96 if main.params[ 'topology' ][ 'topoFile' ]:
97 main.log.info( 'Skipping start of Mininet in this case, make sure you start it elsewhere' )
98 else:
99 applyFuncs.append( main.HA.startingMininet )
100 applyArgs.append( None )
101 except (KeyError, IndexError):
102 applyFuncs.append( main.HA.startingMininet )
103 applyArgs.append( None )
104
Jon Hall3e6edb32018-08-21 16:20:30 -0700105 main.testSetUp.ONOSSetUp( main.Cluster, cellName=cellName,
Jon Hall0e240372018-05-02 11:21:57 -0700106 extraApply=applyFuncs,
107 applyArgs=applyArgs,
108 extraClean=main.HA.cleanUpGenPartition,
109 includeCaseDesc=False )
110 main.HA.initialSetUp( serviceClean=True )
111
112 main.step( 'Set logging levels' )
113 logging = True
114 try:
115 logs = main.params.get( 'ONOS_Logging', False )
116 if logs:
117 for namespace, level in logs.items():
118 for ctrl in main.Cluster.active():
119 ctrl.CLI.logSet( level, namespace )
120 except AttributeError:
121 logging = False
122 utilities.assert_equals( expect=True, actual=logging,
123 onpass="Set log levels",
124 onfail="Failed to set log levels" )
125
126 def CASE2( self, main ):
127 """
128 Assign devices to controllers
129 """
130 main.HA.assignDevices( main )
131
132 def CASE102( self, main ):
133 """
134 Set up Spine-Leaf fabric topology in Mininet
135 """
136 main.HA.startTopology( main )
137
138 def CASE21( self, main ):
139 """
140 Assign mastership to controllers
141 """
142 main.HA.assignMastership( main )
143
144 def CASE3( self, main ):
145 """
146 Assign intents
147 """
148 main.HA.assignIntents( main )
149
150 def CASE4( self, main ):
151 """
152 Ping across added host intents
153 """
154 main.HA.pingAcrossHostIntent( main )
155
156 def CASE104( self, main ):
157 """
158 Ping Hosts
159 """
160 main.case( "Check connectivity" )
161 main.step( "Ping between all hosts" )
162 pingResult = main.Mininet1.pingall()
163 utilities.assert_equals( expect=main.TRUE, actual=pingResult,
164 onpass="All Pings Passed",
165 onfail="Failed to ping between all hosts" )
166
167 def CASE5( self, main ):
168 """
169 Reading state of ONOS
170 """
171 main.HA.readingState( main )
172
173 def CASE61( self, main ):
174 """
175 The Failure case.
176 """
177 assert main, "main not defined"
178 assert utilities.assert_equals, "utilities.assert_equals not defined"
179 main.case( "Simulate a power failure on a minority of ONOS nodes" )
180
181 main.step( "Checking ONOS Logs for errors" )
182 for ctrl in main.Cluster.active():
183 main.log.debug( "Checking logs for errors on " + ctrl.name + ":" )
184 main.log.warn( ctrl.checkLogs( ctrl.ipAddress ) )
185
186 main.kill = [ main.Cluster.runningNodes[ 0 ] ] # ONOS node to kill, listed by index in main.nodes
187 n = len( main.Cluster.runningNodes ) # Number of nodes
188 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
189 if n > 3:
190 main.kill.append( main.Cluster.runningNodes[ p - 1 ] )
191 # NOTE: This only works for cluster sizes of 3,5, or 7.
192
193 # NOTE: This is to fix an issue with wiki formating
194 nodeNames = [ node.name for node in main.kill ]
195 # Set the env variables so we actually use the warden power ON/OFF functionality
196 # NOTE: Only works with warden
197 main.ONOSbench.setEnv( "HARD_POWER_OFF", "True" )
198 main.ONOSbench.setEnv( "ONOS_CELL", "borrow" )
199 main.step( "Killing nodes: " + str( nodeNames ) )
200 killResults = main.TRUE
201 userName = main.params[ 'cell' ][ 'user' ]
202 for ctrl in main.kill:
203 killResults = killResults and\
204 main.ONOSbench.onosPower( ctrl.ipAddress, "off", userName )
205 ctrl.active = False
206 main.Cluster.reset()
207 utilities.assert_equals( expect=main.TRUE, actual=killResults,
208 onpass="ONOS nodes killed successfully",
209 onfail="ONOS nodes NOT successfully killed" )
210
211 main.step( "Checking ONOS nodes" )
212 nodeResults = utilities.retry( main.Cluster.nodesCheck,
213 False,
214 sleep=15,
215 attempts=5 )
216
217 utilities.assert_equals( expect=True, actual=nodeResults,
218 onpass="Nodes check successful",
219 onfail="Nodes check NOT successful" )
220
221 if not nodeResults:
222 for ctrl in main.Cluster.active():
223 main.log.debug( "{} components not ACTIVE: \n{}".format(
224 ctrl.name,
225 ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
226 main.log.error( "Failed to start ONOS, stopping test" )
227 main.cleanAndExit()
228
229 for i in range( 1, 100 ):
230 main.Cluster.next().summary()
231 for i in range( 1, 100 ):
232 main.Cluster.next().partitions()
233 for ctrl in main.Cluster.active():
234 main.log.warn( repr( ctrl ) )
235
236 def CASE62( self, main ):
237 """
238 The bring up stopped nodes
239 """
240 userName = main.params[ 'cell' ][ 'user' ]
241 # NOTE: The warden will actually power up in reverse alphabetical order of container
242 # names in a cell, ignoring the ip given.
243 for ctrl in main.kill:
244 main.ONOSbench.onosPower( ctrl.ipAddress, "on", userName )
245 for component in [ ctrl.CLI, ctrl.server ]:
246 component.connect()
247 main.HA.bringUpStoppedNodes( main )
248 for ctrl in main.Cluster.active():
249 main.log.warn( repr( ctrl ) )
250
251 def CASE7( self, main ):
252 """
253 Check state after ONOS failure
254 """
255 try:
256 main.kill
257 except AttributeError:
258 main.kill = []
259
260 main.HA.checkStateAfterEvent( main, afterWhich=0 )
261 main.step( "Leadership Election is still functional" )
262 # Test of LeadershipElection
263 leaderList = []
264
265 restarted = []
266 for ctrl in main.kill:
267 restarted.append( ctrl.ipAddress )
268 leaderResult = main.TRUE
269
270 for ctrl in main.Cluster.active():
271 leaderN = ctrl.electionTestLeader()
272 leaderList.append( leaderN )
273 if leaderN == main.FALSE:
274 # error in response
275 main.log.error( "Something is wrong with " +
276 "electionTestLeader function, check the" +
277 " error logs" )
278 leaderResult = main.FALSE
279 elif leaderN is None:
280 main.log.error( ctrl.name +
281 " shows no leader for the election-app was" +
282 " elected after the old one died" )
283 leaderResult = main.FALSE
284 elif leaderN in restarted:
285 main.log.error( ctrl.name + " shows " + str( leaderN ) +
286 " as leader for the election-app, but it " +
287 "was restarted" )
288 leaderResult = main.FALSE
289 if len( set( leaderList ) ) != 1:
290 leaderResult = main.FALSE
291 main.log.error(
292 "Inconsistent view of leader for the election test app" )
293 main.log.debug( leaderList )
294 utilities.assert_equals(
295 expect=main.TRUE,
296 actual=leaderResult,
297 onpass="Leadership election passed",
298 onfail="Something went wrong with Leadership election" )
299
300 def CASE8( self, main ):
301 """
302 Compare topo
303 """
304 main.HA.compareTopo( main )
305
306 def CASE9( self, main ):
307 """
308 Link down
309 """
310 src = main.params['kill']['linkSrc']
311 dst = main.params['kill']['linkDst']
312 main.HA.linkDown( main, src, dst )
313
314 def CASE10( self, main ):
315 """
316 Link up
317 """
318 src = main.params['kill']['linkSrc']
319 dst = main.params['kill']['linkDst']
320 main.HA.linkUp( main, src, dst )
321
322 def CASE11( self, main ):
323 """
324 Switch Down
325 """
326 # NOTE: You should probably run a topology check after this
327 main.HA.switchDown( main )
328
329 def CASE12( self, main ):
330 """
331 Switch Up
332 """
333 # NOTE: You should probably run a topology check after this
334 main.HA.switchUp( main )
335
336 def CASE13( self, main ):
337 """
338 Clean up
339 """
340 main.HA.cleanUp( main )
341
342 def CASE14( self, main ):
343 """
344 Start election app on all onos nodes
345 """
346 main.HA.startElectionApp( main )
347
348 def CASE15( self, main ):
349 """
350 Check that Leadership Election is still functional
351 15.1 Run election on each node
352 15.2 Check that each node has the same leaders and candidates
353 15.3 Find current leader and withdraw
354 15.4 Check that a new node was elected leader
355 15.5 Check that that new leader was the candidate of old leader
356 15.6 Run for election on old leader
357 15.7 Check that oldLeader is a candidate, and leader if only 1 node
358 15.8 Make sure that the old leader was added to the candidate list
359
360 old and new variable prefixes refer to data from before vs after
361 withdrawl and later before withdrawl vs after re-election
362 """
363 main.HA.isElectionFunctional( main )
364
365 def CASE16( self, main ):
366 """
367 Install Distributed Primitives app
368 """
369 main.HA.installDistributedPrimitiveApp( main )
370
371 def CASE17( self, main ):
372 """
373 Check for basic functionality with distributed primitives
374 """
375 main.HA.checkDistPrimitivesFunc( main )