blob: 24d5bf883a9ab22a3606820ba70055fba459e453 [file] [log] [blame]
Jon Hall5cf14d52015-07-16 12:15:19 -07001"""
Jeremy Ronquillob27ce4c2017-07-17 12:41:28 -07002Copyright 2015 Open Networking Foundation (ONF)
3
4Please refer questions to either the onos test mailing list at <onos-test@onosproject.org>,
5the System Testing Plans and Results wiki page at <https://wiki.onosproject.org/x/voMg>,
6or the System Testing Guide page at <https://wiki.onosproject.org/x/WYQg>
7
8 TestON is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 2 of the License, or
11 (at your option) any later version.
12
13 TestON is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with TestON. If not, see <http://www.gnu.org/licenses/>.
20"""
21
22"""
Jon Hall5cf14d52015-07-16 12:15:19 -070023Description: This test is to determine if ONOS can handle
24 a minority of it's nodes restarting
25
26List of test cases:
27CASE1: Compile ONOS and push it to the test machines
28CASE2: Assign devices to controllers
29CASE21: Assign mastership to controllers
30CASE3: Assign intents
31CASE4: Ping across added host intents
32CASE5: Reading state of ONOS
Jon Hallb3ed8ed2015-10-28 16:43:55 -070033CASE61: The Failure inducing case.
34CASE62: The Failure recovery case.
Jon Hall5cf14d52015-07-16 12:15:19 -070035CASE7: Check state after control plane failure
36CASE8: Compare topo
37CASE9: Link s3-s28 down
38CASE10: Link s3-s28 up
39CASE11: Switch down
40CASE12: Switch up
41CASE13: Clean up
42CASE14: start election app on all onos nodes
43CASE15: Check that Leadership Election is still functional
44CASE16: Install Distributed Primitives app
45CASE17: Check for basic functionality with distributed primitives
46"""
Jon Hallb3ed8ed2015-10-28 16:43:55 -070047class HAkillNodes:
Jon Hall5cf14d52015-07-16 12:15:19 -070048
49 def __init__( self ):
50 self.default = ''
51
52 def CASE1( self, main ):
53 """
54 CASE1 is to compile ONOS and push it to the test machines
55
56 Startup sequence:
57 cell <name>
58 onos-verify-cell
59 NOTE: temporary - onos-remove-raft-logs
60 onos-uninstall
61 start mininet
62 git pull
63 mvn clean install
64 onos-package
65 onos-install -f
66 onos-wait-for-start
67 start cli sessions
68 start tcpdump
69 """
Jon Halle1a3b752015-07-22 13:02:46 -070070 import imp
Jon Hall3b489db2015-10-05 14:38:37 -070071 import pexpect
Jon Hall6e709752016-02-01 13:38:46 -080072 import time
Jon Halla440e872016-03-31 15:15:50 -070073 import json
Jon Hall6e709752016-02-01 13:38:46 -080074 main.log.info( "ONOS HA test: Restart a minority of ONOS nodes - " +
Jon Hall5cf14d52015-07-16 12:15:19 -070075 "initialization" )
Devin Lim58046fa2017-07-05 16:55:00 -070076 # set global variables
Jon Halla440e872016-03-31 15:15:50 -070077 # These are for csv plotting in jenkins
Devin Lim58046fa2017-07-05 16:55:00 -070078 main.HAlabels = []
79 main.HAdata = []
80 try:
81 from tests.dependencies.ONOSSetup import ONOSSetup
82 main.testSetUp = ONOSSetup()
83 except ImportError:
84 main.log.error( "ONOSSetup not found. exiting the test" )
85 main.exit()
86 main.testSetUp.envSetupDescription()
Jon Halle1a3b752015-07-22 13:02:46 -070087 try:
Jon Hall53c5e662016-04-13 16:06:56 -070088 from tests.HA.dependencies.HA import HA
Jon Hall41d39f12016-04-11 22:54:35 -070089 main.HA = HA()
Devin Lim58046fa2017-07-05 16:55:00 -070090 # load some variables from the params file
91 cellName = main.params[ 'ENV' ][ 'cellName' ]
92 main.apps = main.params[ 'ENV' ][ 'appString' ]
93 main.numCtrls = int( main.params[ 'num_controllers' ] )
94 if main.ONOSbench.maxNodes and\
95 main.ONOSbench.maxNodes < main.numCtrls:
96 main.numCtrls = int( main.ONOSbench.maxNodes )
97 main.maxNodes = main.numCtrls
98 stepResult = main.testSetUp.envSetup( hasNode=True )
Jon Halle1a3b752015-07-22 13:02:46 -070099 except Exception as e:
Devin Lim58046fa2017-07-05 16:55:00 -0700100 main.testSetUp.envSetupException( e )
101 main.testSetUp.evnSetupConclusion( stepResult )
102 main.HA.generateGraph( "HAkillNodes" )
Jon Hall5cf14d52015-07-16 12:15:19 -0700103
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700104 main.step( "Make sure ONOS service doesn't automatically respawn" )
105 handle = main.ONOSbench.handle
106 handle.sendline( "sed -i -e 's/^respawn$/#respawn/g' tools/package/init/onos.conf" )
107 handle.expect( "\$" ) # $ from the command
Jon Hall334ba942017-01-19 17:02:17 -0800108 handle.sendline( "sed -i -e 's/^Restart=always/Restart=no/g' tools/package/init/onos.service" )
109 handle.expect( "\$" ) # $ from the command
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700110 handle.expect( "\$" ) # $ from the prompt
111
Devin Lim58046fa2017-07-05 16:55:00 -0700112 main.testSetUp.ONOSSetUp( main.Mininet1, cellName=cellName, removeLog=True,
113 extraApply=main.HA.customizeOnosGenPartitions,
114 extraClean=main.HA.cleanUpGenPartition )
Jon Hall5cf14d52015-07-16 12:15:19 -0700115
Devin Lim58046fa2017-07-05 16:55:00 -0700116 main.HA.initialSetUp()
Jon Hall9d2dcad2016-04-08 10:15:20 -0700117
Jon Hall5cf14d52015-07-16 12:15:19 -0700118 def CASE2( self, main ):
119 """
120 Assign devices to controllers
121 """
Devin Lim58046fa2017-07-05 16:55:00 -0700122 main.HA.assignDevices( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700123
124 def CASE21( self, main ):
125 """
126 Assign mastership to controllers
127 """
Devin Lim58046fa2017-07-05 16:55:00 -0700128 main.HA.assignMastership( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700129
130 def CASE3( self, main ):
131 """
132 Assign intents
133 """
Devin Lim58046fa2017-07-05 16:55:00 -0700134 main.HA.assignIntents( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700135
136 def CASE4( self, main ):
137 """
138 Ping across added host intents
139 """
Devin Lim58046fa2017-07-05 16:55:00 -0700140 main.HA.pingAcrossHostIntent( main, True, False )
Jon Hall5cf14d52015-07-16 12:15:19 -0700141
142 def CASE5( self, main ):
143 """
144 Reading state of ONOS
145 """
Devin Lim58046fa2017-07-05 16:55:00 -0700146 main.HA.readingState( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700147
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700148 def CASE61( self, main ):
Jon Hall5cf14d52015-07-16 12:15:19 -0700149 """
150 The Failure case.
151 """
Jon Halle1a3b752015-07-22 13:02:46 -0700152 assert main.numCtrls, "main.numCtrls not defined"
Jon Hall5cf14d52015-07-16 12:15:19 -0700153 assert main, "main not defined"
154 assert utilities.assert_equals, "utilities.assert_equals not defined"
Jon Halle1a3b752015-07-22 13:02:46 -0700155 assert main.CLIs, "main.CLIs not defined"
156 assert main.nodes, "main.nodes not defined"
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700157 main.case( "Kill minority of ONOS nodes" )
Jon Hall96091e62015-09-21 17:34:17 -0700158
159 main.step( "Checking ONOS Logs for errors" )
160 for node in main.nodes:
161 main.log.debug( "Checking logs for errors on " + node.name + ":" )
162 main.log.warn( main.ONOSbench.checkLogs( node.ip_address ) )
163
Jon Hall3b489db2015-10-05 14:38:37 -0700164 n = len( main.nodes ) # Number of nodes
165 p = ( ( n + 1 ) / 2 ) + 1 # Number of partitions
166 main.kill = [ 0 ] # ONOS node to kill, listed by index in main.nodes
167 if n > 3:
168 main.kill.append( p - 1 )
169 # NOTE: This only works for cluster sizes of 3,5, or 7.
170
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700171 main.step( "Kill " + str( len( main.kill ) ) + " ONOS nodes" )
Jon Hall3b489db2015-10-05 14:38:37 -0700172 killResults = main.TRUE
173 for i in main.kill:
174 killResults = killResults and\
Jon Hallf37d44d2017-05-24 10:37:30 -0700175 main.ONOSbench.onosKill( main.nodes[ i ].ip_address )
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700176 main.activeNodes.remove( i )
Jon Hall5cf14d52015-07-16 12:15:19 -0700177 utilities.assert_equals( expect=main.TRUE, actual=killResults,
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700178 onpass="ONOS nodes killed successfully",
179 onfail="ONOS nodes NOT successfully killed" )
180
Jon Halld2871c22016-07-26 11:01:14 -0700181 main.step( "Checking ONOS nodes" )
182 nodeResults = utilities.retry( main.HA.nodesCheck,
183 False,
Jon Hallf37d44d2017-05-24 10:37:30 -0700184 args=[ main.activeNodes ],
Jon Halld2871c22016-07-26 11:01:14 -0700185 sleep=15,
186 attempts=5 )
187
188 utilities.assert_equals( expect=True, actual=nodeResults,
189 onpass="Nodes check successful",
190 onfail="Nodes check NOT successful" )
191
192 if not nodeResults:
193 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700194 cli = main.CLIs[ i ]
Jon Halld2871c22016-07-26 11:01:14 -0700195 main.log.debug( "{} components not ACTIVE: \n{}".format(
196 cli.name,
197 cli.sendline( "scr:list | grep -v ACTIVE" ) ) )
198 main.log.error( "Failed to start ONOS, stopping test" )
199 main.cleanup()
200 main.exit()
201
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700202 def CASE62( self, main ):
203 """
204 The bring up stopped nodes
205 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700206
Devin Lim58046fa2017-07-05 16:55:00 -0700207 main.HA.bringUpStoppedNode( main )
Jon Halla440e872016-03-31 15:15:50 -0700208
Jon Hall5cf14d52015-07-16 12:15:19 -0700209 def CASE7( self, main ):
210 """
211 Check state after ONOS failure
212 """
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700213 try:
214 main.kill
215 except AttributeError:
216 main.kill = []
217
Devin Lim58046fa2017-07-05 16:55:00 -0700218 main.HA.checkStateAfterONOS( main, afterWhich=0 )
Jon Hall5cf14d52015-07-16 12:15:19 -0700219 main.step( "Leadership Election is still functional" )
220 # Test of LeadershipElection
221 leaderList = []
Jon Hall5cf14d52015-07-16 12:15:19 -0700222
Jon Hall3b489db2015-10-05 14:38:37 -0700223 restarted = []
224 for i in main.kill:
Jon Hallf37d44d2017-05-24 10:37:30 -0700225 restarted.append( main.nodes[ i ].ip_address )
Jon Hall5cf14d52015-07-16 12:15:19 -0700226 leaderResult = main.TRUE
Jon Hall3b489db2015-10-05 14:38:37 -0700227
Jon Hallb3ed8ed2015-10-28 16:43:55 -0700228 for i in main.activeNodes:
Jon Hallf37d44d2017-05-24 10:37:30 -0700229 cli = main.CLIs[ i ]
Jon Hall5cf14d52015-07-16 12:15:19 -0700230 leaderN = cli.electionTestLeader()
231 leaderList.append( leaderN )
232 if leaderN == main.FALSE:
233 # error in response
234 main.log.error( "Something is wrong with " +
235 "electionTestLeader function, check the" +
236 " error logs" )
237 leaderResult = main.FALSE
238 elif leaderN is None:
239 main.log.error( cli.name +
240 " shows no leader for the election-app was" +
241 " elected after the old one died" )
242 leaderResult = main.FALSE
243 elif leaderN in restarted:
244 main.log.error( cli.name + " shows " + str( leaderN ) +
245 " as leader for the election-app, but it " +
246 "was restarted" )
247 leaderResult = main.FALSE
248 if len( set( leaderList ) ) != 1:
249 leaderResult = main.FALSE
250 main.log.error(
251 "Inconsistent view of leader for the election test app" )
252 # TODO: print the list
253 utilities.assert_equals(
254 expect=main.TRUE,
255 actual=leaderResult,
256 onpass="Leadership election passed",
257 onfail="Something went wrong with Leadership election" )
258
259 def CASE8( self, main ):
260 """
261 Compare topo
262 """
Devin Lim58046fa2017-07-05 16:55:00 -0700263 main.HA.compareTopo( main )
Jon Halld2871c22016-07-26 11:01:14 -0700264
Jon Hall5cf14d52015-07-16 12:15:19 -0700265 def CASE9( self, main ):
266 """
267 Link s3-s28 down
268 """
Devin Lim58046fa2017-07-05 16:55:00 -0700269 main.HA.linkDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700270
271 def CASE10( self, main ):
272 """
273 Link s3-s28 up
274 """
Devin Lim58046fa2017-07-05 16:55:00 -0700275 main.HA.linkUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700276
277 def CASE11( self, main ):
278 """
279 Switch Down
280 """
281 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700282 main.HA.switchDown( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700283
284 def CASE12( self, main ):
285 """
286 Switch Up
287 """
288 # NOTE: You should probably run a topology check after this
Devin Lim58046fa2017-07-05 16:55:00 -0700289 main.HA.switchUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700290
291 def CASE13( self, main ):
292 """
293 Clean up
294 """
Devin Lim58046fa2017-07-05 16:55:00 -0700295 main.HAlabels.append( "Restart" )
296 main.HAdata.append( str( main.restartTime ) )
297 main.HA.cleanUp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700298
299 def CASE14( self, main ):
300 """
301 start election app on all onos nodes
302 """
Devin Lim58046fa2017-07-05 16:55:00 -0700303 main.HA.startElectionApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700304
305 def CASE15( self, main ):
306 """
307 Check that Leadership Election is still functional
acsmars71adceb2015-08-31 15:09:26 -0700308 15.1 Run election on each node
309 15.2 Check that each node has the same leaders and candidates
310 15.3 Find current leader and withdraw
311 15.4 Check that a new node was elected leader
312 15.5 Check that that new leader was the candidate of old leader
313 15.6 Run for election on old leader
314 15.7 Check that oldLeader is a candidate, and leader if only 1 node
315 15.8 Make sure that the old leader was added to the candidate list
316
317 old and new variable prefixes refer to data from before vs after
318 withdrawl and later before withdrawl vs after re-election
Jon Hall5cf14d52015-07-16 12:15:19 -0700319 """
Devin Lim58046fa2017-07-05 16:55:00 -0700320 main.HA.isElectionFunctional( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700321
322 def CASE16( self, main ):
323 """
324 Install Distributed Primitives app
325 """
Devin Lim58046fa2017-07-05 16:55:00 -0700326 main.HA.installDistributedPrimitiveApp( main )
Jon Hall5cf14d52015-07-16 12:15:19 -0700327
328 def CASE17( self, main ):
329 """
330 Check for basic functionality with distributed primitives
331 """
Devin Lim58046fa2017-07-05 16:55:00 -0700332 main.HA.checkDistPrimitivesFunc( main )